From: Ashish Kalra <ashish.kalra(a)amd.com>
When the shared pages are being made private during kdump preparation
there are additional checks to handle shared GHCB pages.
These additional checks include handling the case of GHCB page being
contained within a huge page.
There is a bug in this additional check for GHCB page contained
within a huge page which causes any shared page just below the
per-cpu GHCB getting skipped from being transitioned back to private
before kdump preparation which subsequently causes a 0x404 #VC
exception when this shared page is accessed later while dumping guest
memory during vmcore generation via kdump.
Correct the detection and handling of GHCB pages contained within
a huge page.
Cc: stable(a)vger.kernel.org
Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
---
arch/x86/coco/sev/core.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 870f4994a13d..ba601ef5242d 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -961,7 +961,13 @@ static void unshare_all_memory(void)
data = per_cpu(runtime_data, cpu);
ghcb = (unsigned long)&data->ghcb_page;
- if (addr <= ghcb && ghcb <= addr + size) {
+ /* Handle the case of a huge page containing the GHCB page */
+ if (level == PG_LEVEL_4K && addr == ghcb) {
+ skipped_addr = true;
+ break;
+ }
+ if (level > PG_LEVEL_4K && addr <= ghcb &&
+ ghcb < addr + size) {
skipped_addr = true;
break;
}
@@ -1074,8 +1080,8 @@ static void snp_shutdown_all_aps(void)
void snp_kexec_finish(void)
{
struct sev_es_runtime_data *data;
+ unsigned long size, mask;
unsigned int level, cpu;
- unsigned long size;
struct ghcb *ghcb;
pte_t *pte;
@@ -1103,6 +1109,10 @@ void snp_kexec_finish(void)
ghcb = &data->ghcb_page;
pte = lookup_address((unsigned long)ghcb, &level);
size = page_level_size(level);
+ mask = page_level_mask(level);
+ /* Handle the case of a huge page containing the GHCB page */
+ if (level > PG_LEVEL_4K)
+ ghcb = (struct ghcb *)((unsigned long)ghcb & mask);
set_pte_enc(pte, level, (void *)ghcb);
snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
}
--
2.34.1
From: Dmitry Bogdanov <d.bogdanov(a)yadro.com>
[ Upstream commit 7f533cc5ee4c4436cee51dc58e81dfd9c3384418 ]
NOPIN response timer may expire on a deleted connection and crash with
such logs:
Did not receive response to NOPIN on CID: 0, failing connection for I_T Nexus (null),i,0x00023d000125,iqn.2017-01.com.iscsi.target,t,0x3d
BUG: Kernel NULL pointer dereference on read at 0x00000000
NIP strlcpy+0x8/0xb0
LR iscsit_fill_cxn_timeout_err_stats+0x5c/0xc0 [iscsi_target_mod]
Call Trace:
iscsit_handle_nopin_response_timeout+0xfc/0x120 [iscsi_target_mod]
call_timer_fn+0x58/0x1f0
run_timer_softirq+0x740/0x860
__do_softirq+0x16c/0x420
irq_exit+0x188/0x1c0
timer_interrupt+0x184/0x410
That is because nopin response timer may be re-started on nopin timer
expiration.
Stop nopin timer before stopping the nopin response timer to be sure
that no one of them will be re-started.
Signed-off-by: Dmitry Bogdanov <d.bogdanov(a)yadro.com>
Link: https://lore.kernel.org/r/20241224101757.32300-1-d.bogdanov@yadro.com
Reviewed-by: Maurizio Lombardi <mlombard(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/target/iscsi/iscsi_target.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 07e196b44b91d..04d40e76772b3 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4314,8 +4314,8 @@ int iscsit_close_connection(
spin_unlock(&iscsit_global->ts_bitmap_lock);
iscsit_stop_timers_for_cmds(conn);
- iscsit_stop_nopin_response_timer(conn);
iscsit_stop_nopin_timer(conn);
+ iscsit_stop_nopin_response_timer(conn);
if (conn->conn_transport->iscsit_wait_conn)
conn->conn_transport->iscsit_wait_conn(conn);
--
2.39.5
On SNDRV_PCM_TRIGGER_START event, audio data pointers are not reset.
This leads to wrong data buffer usage when multiple TRIGGER_START are
received and ends to incorrect buffer usage between the user-space and
the driver. Indeed, the driver can read data that are not already set by
the user-space or the user-space and the driver are writing and reading
the same area.
Fix that resetting data pointers on each SNDRV_PCM_TRIGGER_START events.
Fixes: 075c7125b11c ("ASoC: fsl: Add support for QMC audio")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
sound/soc/fsl/fsl_qmc_audio.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/sound/soc/fsl/fsl_qmc_audio.c b/sound/soc/fsl/fsl_qmc_audio.c
index b2979290c973..5614a8b909ed 100644
--- a/sound/soc/fsl/fsl_qmc_audio.c
+++ b/sound/soc/fsl/fsl_qmc_audio.c
@@ -250,6 +250,9 @@ static int qmc_audio_pcm_trigger(struct snd_soc_component *component,
switch (cmd) {
case SNDRV_PCM_TRIGGER_START:
bitmap_zero(prtd->chans_pending, 64);
+ prtd->buffer_ended = 0;
+ prtd->ch_dma_addr_current = prtd->ch_dma_addr_start;
+
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
for (i = 0; i < prtd->channels; i++)
prtd->qmc_dai->chans[i].prtd_tx = prtd;
--
2.49.0
From: Karol Wachowski <karol.wachowski(a)intel.com>
commit dad945c27a42dfadddff1049cf5ae417209a8996 upstream.
Trigger recovery of the NPU upon receiving HW context violation from
the firmware. The context violation error is a fatal error that prevents
any subsequent jobs from being executed. Without this fix it is
necessary to reload the driver to restore the NPU operational state.
This is simplified version of upstream commit as the full implementation
would require all engine reset/resume logic to be backported.
Signed-off-by: Karol Wachowski <karol.wachowski(a)intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski(a)linux.intel.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz(a)linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-13-maci…
Fixes: 0adff3b0ef12 ("accel/ivpu: Share NPU busy time in sysfs")
Cc: <stable(a)vger.kernel.org> # v6.11+
---
drivers/accel/ivpu/ivpu_job.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index be2e2bf0f43f0..70b3676974407 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -482,6 +482,8 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
return job;
}
+#define VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW 0xEU
+
static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
{
struct ivpu_job *job;
@@ -490,6 +492,9 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
if (!job)
return -ENOENT;
+ if (job_status == VPU_JSM_STATUS_MVNCI_CONTEXT_VIOLATION_HW)
+ ivpu_pm_trigger_recovery(vdev, "HW context violation");
+
if (job->file_priv->has_mmu_faults)
job_status = DRM_IVPU_JOB_STATUS_ABORTED;
--
2.45.1
The following commit has been merged into the perf/urgent branch of tip:
Commit-ID: 58f6217e5d0132a9f14e401e62796916aa055c1b
Gitweb: https://git.kernel.org/tip/58f6217e5d0132a9f14e401e62796916aa055c1b
Author: Sean Christopherson <seanjc(a)google.com>
AuthorDate: Fri, 25 Apr 2025 17:13:55 -07:00
Committer: Peter Zijlstra <peterz(a)infradead.org>
CommitterDate: Wed, 30 Apr 2025 13:58:29 +02:00
perf/x86/intel: KVM: Mask PEBS_ENABLE loaded for guest with vCPU's value.
When generating the MSR_IA32_PEBS_ENABLE value that will be loaded on
VM-Entry to a KVM guest, mask the value with the vCPU's desired PEBS_ENABLE
value. Consulting only the host kernel's host vs. guest masks results in
running the guest with PEBS enabled even when the guest doesn't want to use
PEBS. Because KVM uses perf events to proxy the guest virtual PMU, simply
looking at exclude_host can't differentiate between events created by host
userspace, and events created by KVM on behalf of the guest.
Running the guest with PEBS unexpectedly enabled typically manifests as
crashes due to a near-infinite stream of #PFs. E.g. if the guest hasn't
written MSR_IA32_DS_AREA, the CPU will hit page faults on address '0' when
trying to record PEBS events.
The issue is most easily reproduced by running `perf kvm top` from before
commit 7b100989b4f6 ("perf evlist: Remove __evlist__add_default") (after
which, `perf kvm top` effectively stopped using PEBS). The userspace side
of perf creates a guest-only PEBS event, which intel_guest_get_msrs()
misconstrues a guest-*owned* PEBS event.
Arguably, this is a userspace bug, as enabling PEBS on guest-only events
simply cannot work, and userspace can kill VMs in many other ways (there
is no danger to the host). However, even if this is considered to be bad
userspace behavior, there's zero downside to perf/KVM restricting PEBS to
guest-owned events.
Note, commit 854250329c02 ("KVM: x86/pmu: Disable guest PEBS temporarily
in two rare situations") fixed the case where host userspace is profiling
KVM *and* userspace, but missed the case where userspace is profiling only
KVM.
Fixes: c59a1f106f5c ("KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS")
Closes: https://lore.kernel.org/all/Z_VUswFkWiTYI0eD@do-x1carbon
Reported-by: Seth Forshee <sforshee(a)kernel.org>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Reviewed-by: Dapeng Mi <dapeng1.mi(a)linux.intel.com>
Tested-by: "Seth Forshee (DigitalOcean)" <sforshee(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250426001355.1026530-1-seanjc@google.com
---
arch/x86/events/intel/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 00dfe48..c5f3854 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4395,7 +4395,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
arr[pebs_enable] = (struct perf_guest_switch_msr){
.msr = MSR_IA32_PEBS_ENABLE,
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
- .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
};
if (arr[pebs_enable].host) {
From: Matthew Brost <matthew.brost(a)intel.com>
Add timeslicing support to GPU SVM which will guarantee the GPU a
minimum execution time on piece of physical memory before migration back
to CPU. Intended to implement strict migration policies which require
memory to be in a certain placement for correct execution.
Required for shared CPU and GPU atomics on certain devices.
Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matthew Brost <matthew.brost(a)intel.com>
---
drivers/gpu/drm/drm_gpusvm.c | 9 +++++++++
include/drm/drm_gpusvm.h | 5 +++++
2 files changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index a58d03e6cac2..c94a8d7a293d 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1770,6 +1770,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
goto err_finalize;
/* Upon success bind devmem allocation to range and zdd */
+ devmem_allocation->timeslice_expiration = get_jiffies_64() +
+ msecs_to_jiffies(ctx->timeslice_ms);
zdd->devmem_allocation = devmem_allocation; /* Owns ref */
err_finalize:
@@ -1990,6 +1992,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
+ if (page) {
+ zdd = page->zone_device_data;
+ if (time_before64(get_jiffies_64(),
+ zdd->devmem_allocation->timeslice_expiration))
+ return 0;
+ }
+
start = ALIGN_DOWN(fault_addr, size);
end = ALIGN(fault_addr + 1, size);
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 653d48dbe1c1..eaf704d3d05e 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops {
* @ops: Pointer to the operations structure for GPU SVM device memory
* @dpagemap: The struct drm_pagemap of the pages this allocation belongs to.
* @size: Size of device memory allocation
+ * @timeslice_expiration: Timeslice expiration in jiffies
*/
struct drm_gpusvm_devmem {
struct device *dev;
@@ -97,6 +98,7 @@ struct drm_gpusvm_devmem {
const struct drm_gpusvm_devmem_ops *ops;
struct drm_pagemap *dpagemap;
size_t size;
+ u64 timeslice_expiration;
};
/**
@@ -295,6 +297,8 @@ struct drm_gpusvm {
* @check_pages_threshold: Check CPU pages for present if chunk is less than or
* equal to threshold. If not present, reduce chunk
* size.
+ * @timeslice_ms: The timeslice MS which in minimum time a piece of memory
+ * remains with either exclusive GPU or CPU access.
* @in_notifier: entering from a MMU notifier
* @read_only: operating on read-only memory
* @devmem_possible: possible to use device memory
@@ -304,6 +308,7 @@ struct drm_gpusvm {
*/
struct drm_gpusvm_ctx {
unsigned long check_pages_threshold;
+ unsigned long timeslice_ms;
unsigned int in_notifier :1;
unsigned int read_only :1;
unsigned int devmem_possible :1;
--
2.34.1