Top of the day to you, I'm Yuliia Kadulina Deputy Chairman of the Management Board, and Personal Finance Director at Ukrsibbank Ukraine. I have a proposal for you. Kindly revert so I can fill you in on the details.
Thanks,
Yuliia
From: Valentin Schneider <vschneid(a)redhat.com>
commit 811d581194f7412eda97acc03d17fc77824b561f upstream.
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI
panic() doesn't work. The cause of that lies in the PREEMPT_RT definition
of mutex_trylock():
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
return 0;
This prevents an nmi_panic() from executing the main body of
__crash_kexec() which does the actual kexec into the kdump kernel. The
warning and return are explained by:
6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
[...]
The reasons for this are:
1) There is a potential deadlock in the slowpath
2) Another cpu which blocks on the rtmutex will boost the task
which allegedly locked the rtmutex, but that cannot work
because the hard/softirq context borrows the task context.
Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex
and replace it with an atomic variable. This is somewhat overzealous as
*some* callsites could keep using a mutex (e.g. the sysfs-facing ones
like crash_shrink_memory()), but this has the benefit of involving a
single unified lock and preventing any future NMI-related surprises.
Tested by triggering NMI panics via:
$ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
$ echo 1 > /proc/sys/kernel/unknown_nmi_panic
$ echo 1 > /proc/sys/kernel/panic
$ ipmitool power diag
Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com
Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context")
Signed-off-by: Valentin Schneider <vschneid(a)redhat.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: "Eric W . Biederman" <ebiederm(a)xmission.com>
Cc: Juri Lelli <jlelli(a)redhat.com>
Cc: Luis Claudio R. Goncalves <lgoncalv(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Wen Yang <wenyang.linux(a)foxmail.com>
---
kernel/kexec.c | 11 ++++-------
kernel/kexec_core.c | 20 ++++++++++----------
kernel/kexec_file.c | 4 ++--
kernel/kexec_internal.h | 15 ++++++++++++++-
4 files changed, 30 insertions(+), 20 deletions(-)
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 9c7aef8f4bb6..f0f0c6555454 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -112,13 +112,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
/*
* Because we write directly to the reserved memory region when loading
- * crash kernels we need a mutex here to prevent multiple crash kernels
- * from attempting to load simultaneously, and to prevent a crash kernel
- * from loading over the top of a in use crash kernel.
- *
- * KISS: always take the mutex.
+ * crash kernels we need a serialization here to prevent multiple crash
+ * kernels from attempting to load simultaneously.
*/
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (flags & KEXEC_ON_CRASH) {
@@ -184,7 +181,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
kimage_free(image);
out_unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index e47870f30728..7a8104d48997 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -45,7 +45,7 @@
#include <crypto/sha.h>
#include "kexec_internal.h"
-DEFINE_MUTEX(kexec_mutex);
+atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -943,7 +943,7 @@ int kexec_load_disabled;
*/
void __noclone __crash_kexec(struct pt_regs *regs)
{
- /* Take the kexec_mutex here to prevent sys_kexec_load
+ /* Take the kexec_lock here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
*
@@ -951,7 +951,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
* of memory the xchg(&kexec_crash_image) would be
* sufficient. But since I reuse the memory...
*/
- if (mutex_trylock(&kexec_mutex)) {
+ if (kexec_trylock()) {
if (kexec_crash_image) {
struct pt_regs fixed_regs;
@@ -960,7 +960,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
machine_crash_shutdown(&fixed_regs);
machine_kexec(kexec_crash_image);
}
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
}
}
STACK_FRAME_NON_STANDARD(__crash_kexec);
@@ -993,13 +993,13 @@ ssize_t crash_get_memory_size(void)
{
ssize_t size = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (crashk_res.end != crashk_res.start)
size = resource_size(&crashk_res);
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return size;
}
@@ -1019,7 +1019,7 @@ int crash_shrink_memory(unsigned long new_size)
unsigned long old_size;
struct resource *ram_res;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (kexec_crash_image) {
@@ -1058,7 +1058,7 @@ int crash_shrink_memory(unsigned long new_size)
insert_resource(&iomem_resource, ram_res);
unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return ret;
}
@@ -1130,7 +1130,7 @@ int kernel_kexec(void)
{
int error = 0;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
if (!kexec_image) {
error = -EINVAL;
@@ -1205,7 +1205,7 @@ int kernel_kexec(void)
#endif
Unlock:
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
return error;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index fff11916aba3..b9c857782ada 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -343,7 +343,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
image = NULL;
- if (!mutex_trylock(&kexec_mutex))
+ if (!kexec_trylock())
return -EBUSY;
dest_image = &kexec_image;
@@ -415,7 +415,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
- mutex_unlock(&kexec_mutex);
+ kexec_unlock();
kimage_free(image);
return ret;
}
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..49d4e3ab9c96 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -15,7 +15,20 @@ int kimage_is_destination_range(struct kimage *image,
int machine_kexec_post_load(struct kimage *image);
-extern struct mutex kexec_mutex;
+/*
+ * Whatever is used to serialize accesses to the kexec_crash_image needs to be
+ * NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
+ * "simple" atomic variable that is acquired with a cmpxchg().
+ */
+extern atomic_t __kexec_lock;
+static inline bool kexec_trylock(void)
+{
+ return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
+}
+static inline void kexec_unlock(void)
+{
+ atomic_set_release(&__kexec_lock, 0);
+}
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
--
2.37.2
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 8e47363588377e1bdb65e2b020b409cfb44dd260
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167819253422103(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
8e4736358837 ("thermal: intel: powerclamp: Fix cur_state for multi package system")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Wed, 1 Feb 2023 12:39:41 -0800
Subject: [PATCH] thermal: intel: powerclamp: Fix cur_state for multi package
system
The powerclamp cooling device cur_state shows actual idle observed by
package C-state idle counters. But the implementation is not sufficient
for multi package or multi die system. The cur_state value is incorrect.
On these systems, these counters must be read from each package/die and
somehow aggregate them. But there is no good method for aggregation.
It was not a problem when explicit CPU model addition was required to
enable intel powerclamp. In this way certain CPU models could have
been avoided. But with the removal of CPU model check with the
availability of Package C-state counters, the driver is loaded on most
of the recent systems.
For multi package/die systems, just show the actual target idle state,
the system is trying to achieve. In powerclamp this is the user set
state minus one.
Also there is no use of starting a worker thread for polling package
C-state counters and applying any compensation for multiple package
or multiple die systems.
Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: 4.14+ <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..2f4cbfdf26a0 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -57,6 +57,7 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
/* user selected target */
static unsigned int set_target_ratio;
@@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -519,7 +523,8 @@ static int start_power_clamp(void)
control_cpu = cpumask_first(cpu_online_mask);
clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one kthread worker per online cpu */
for_each_online_cpu(cpu) {
@@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
+ if (clamping) {
+ if (poll_pkg_cstate_enable)
+ *state = pkg_cstate_ratio_cur;
+ else
+ *state = set_target_ratio;
+ } else {
/* to save power, do not poll idle ratio while not clamping */
*state = -1; /* indicates invalid state */
+ }
return 0;
}
@@ -712,6 +721,9 @@ static int __init powerclamp_init(void)
goto exit_unregister;
}
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
&powerclamp_cooling_ops);
if (IS_ERR(cooling_dev)) {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8e47363588377e1bdb65e2b020b409cfb44dd260
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781926219650(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8e4736358837 ("thermal: intel: powerclamp: Fix cur_state for multi package system")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8e47363588377e1bdb65e2b020b409cfb44dd260 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Wed, 1 Feb 2023 12:39:41 -0800
Subject: [PATCH] thermal: intel: powerclamp: Fix cur_state for multi package
system
The powerclamp cooling device cur_state shows actual idle observed by
package C-state idle counters. But the implementation is not sufficient
for multi package or multi die system. The cur_state value is incorrect.
On these systems, these counters must be read from each package/die and
somehow aggregate them. But there is no good method for aggregation.
It was not a problem when explicit CPU model addition was required to
enable intel powerclamp. In this way certain CPU models could have
been avoided. But with the removal of CPU model check with the
availability of Package C-state counters, the driver is loaded on most
of the recent systems.
For multi package/die systems, just show the actual target idle state,
the system is trying to achieve. In powerclamp this is the user set
state minus one.
Also there is no use of starting a worker thread for polling package
C-state counters and applying any compensation for multiple package
or multiple die systems.
Fixes: b721ca0d1927 ("thermal/powerclamp: remove cpu whitelist")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: 4.14+ <stable(a)vger.kernel.org> # 4.14+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index b80e25ec1261..2f4cbfdf26a0 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -57,6 +57,7 @@
static unsigned int target_mwait;
static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
/* user selected target */
static unsigned int set_target_ratio;
@@ -261,6 +262,9 @@ static unsigned int get_compensation(int ratio)
{
unsigned int comp = 0;
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
/* we only use compensation if all adjacent ones are good */
if (ratio == 1 &&
cal_data[ratio].confidence >= CONFIDENCE_OK &&
@@ -519,7 +523,8 @@ static int start_power_clamp(void)
control_cpu = cpumask_first(cpu_online_mask);
clamping = true;
- schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
/* start one kthread worker per online cpu */
for_each_online_cpu(cpu) {
@@ -585,11 +590,15 @@ static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
- if (true == clamping)
- *state = pkg_cstate_ratio_cur;
- else
+ if (clamping) {
+ if (poll_pkg_cstate_enable)
+ *state = pkg_cstate_ratio_cur;
+ else
+ *state = set_target_ratio;
+ } else {
/* to save power, do not poll idle ratio while not clamping */
*state = -1; /* indicates invalid state */
+ }
return 0;
}
@@ -712,6 +721,9 @@ static int __init powerclamp_init(void)
goto exit_unregister;
}
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+
cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
&powerclamp_cooling_ops);
if (IS_ERR(cooling_dev)) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 194b3348bdbb7db65375c72f3f774aee4cc6614e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678206664158120(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
803766cbf85f ("iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Date: Thu, 16 Feb 2023 21:08:15 +0800
Subject: [PATCH] iommu/vt-d: Fix PASID directory pointer coherency
On platforms that do not support IOMMU Extended capability bit 0
Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
any translation structures. IOMMU access goes only directly to
memory. Intel IOMMU code was missing a flush for the PASID table
directory that resulted in the unrecoverable fault as shown below.
This patch adds clflush calls whenever allocating and updating
a PASID table directory to ensure cache coherency.
On the reverse direction, there's no need to clflush the PASID directory
pointer when we deactivate a context entry in that IOMMU hardware will
not see the old PASID directory pointer after we clear the context entry.
PASID directory entries are also never freed once allocated.
DMAR: DRHD: handling fault status reg 3
DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
[fault reason 0x51] SM: Present bit in Directory Entry is clear
DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
DMAR: pasid dir entry: 0x0000000101b4e001
DMAR: pasid table entry[0]: 0x0000000000000109
DMAR: pasid table entry[1]: 0x0000000000000001
DMAR: pasid table entry[2]: 0x0000000000000000
DMAR: pasid table entry[3]: 0x0000000000000000
DMAR: pasid table entry[4]: 0x0000000000000000
DMAR: pasid table entry[5]: 0x0000000000000000
DMAR: pasid table entry[6]: 0x0000000000000000
DMAR: pasid table entry[7]: 0x0000000000000000
DMAR: PTE not present at level 4
Cc: <stable(a)vger.kernel.org>
Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reported-by: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.inte…
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ec964ac7d797..9d2f05cf6164 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -215,6 +218,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 194b3348bdbb7db65375c72f3f774aee4cc6614e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167820662222233(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 194b3348bdbb7db65375c72f3f774aee4cc6614e Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Date: Thu, 16 Feb 2023 21:08:15 +0800
Subject: [PATCH] iommu/vt-d: Fix PASID directory pointer coherency
On platforms that do not support IOMMU Extended capability bit 0
Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
any translation structures. IOMMU access goes only directly to
memory. Intel IOMMU code was missing a flush for the PASID table
directory that resulted in the unrecoverable fault as shown below.
This patch adds clflush calls whenever allocating and updating
a PASID table directory to ensure cache coherency.
On the reverse direction, there's no need to clflush the PASID directory
pointer when we deactivate a context entry in that IOMMU hardware will
not see the old PASID directory pointer after we clear the context entry.
PASID directory entries are also never freed once allocated.
DMAR: DRHD: handling fault status reg 3
DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
[fault reason 0x51] SM: Present bit in Directory Entry is clear
DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
DMAR: pasid dir entry: 0x0000000101b4e001
DMAR: pasid table entry[0]: 0x0000000000000109
DMAR: pasid table entry[1]: 0x0000000000000001
DMAR: pasid table entry[2]: 0x0000000000000000
DMAR: pasid table entry[3]: 0x0000000000000000
DMAR: pasid table entry[4]: 0x0000000000000000
DMAR: pasid table entry[5]: 0x0000000000000000
DMAR: pasid table entry[6]: 0x0000000000000000
DMAR: pasid table entry[7]: 0x0000000000000000
DMAR: PTE not present at level 4
Cc: <stable(a)vger.kernel.org>
Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reported-by: Sukumar Ghorai <sukumar.ghorai(a)intel.com>
Signed-off-by: Ashok Raj <ashok.raj(a)intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.inte…
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ec964ac7d797..9d2f05cf6164 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
info->pasid_table = pasid_table;
+ if (!ecap_coherent(info->iommu->ecap))
+ clflush_cache_range(pasid_table->table, size);
+
return 0;
}
@@ -215,6 +218,10 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
free_pgtable_page(entries);
goto retry;
}
+ if (!ecap_coherent(info->iommu->ecap)) {
+ clflush_cache_range(entries, VTD_PAGE_SIZE);
+ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
+ }
}
return &entries[index];