Hi,
Memory corruption may occur if the location of the HFI memory buffer is not restored when resuming from hibernation or suspend-to-memory.
During a normal boot, the kernel allocates a memory buffer and gives it to the hardware for reporting updates in the HFI table. The same allocation process is done by a restore kernel when resuming from suspend or hibernation.
The location of the memory that the restore kernel allocates may differ from that allocated by the image kernel. To prevent memory corruption (the hardware keeps using the memory buffer from the restore kernel), it is necessary to disable HFI before transferring control to the image kernel. Once running, the image kernel must restore the location of the HFI memory and enable HFI.
The patchset addresses the described bug on systems with one or more HFI instances (i.e., packages) using CPU hotplug callbacks and a suspend notifier.
I tested this patchset on Meteor Lake and Sapphire Rapids. The systems completed 3500 (in two separate tests of 1500 and 2000 repeats) and 1000 hibernate-resume cycles, respectively. I tested it using Rafael's testing branch as on 20th December 2023.
Thanks and BR, Ricardo
Ricardo Neri (4): thermal: intel: hfi: Refactor enabling code into helper functions thermal: intel: hfi: Enable an HFI instance from its first online CPU thermal: intel: hfi: Disable an HFI instance when all its CPUs go offline thermal: intel: hfi: Add a suspend notifier
drivers/thermal/intel/intel_hfi.c | 142 ++++++++++++++++++++++++------ 1 file changed, 116 insertions(+), 26 deletions(-)
In preparation to add a suspend notifier, wrap the logic to enable HFI and program its memory buffer into helper functions. Both the CPU hotplug callback and the suspend notifier will use it.
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index c69db6c90869..87ac7b196981 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -347,6 +347,25 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance) hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; }
+static void hfi_enable(void) +{ + u64 msr_val; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); +} + +static void hfi_set_hw_table(struct hfi_instance *hfi_instance) +{ + phys_addr_t hw_table_pa; + u64 msr_val; + + hw_table_pa = virt_to_phys(hfi_instance->hw_table); + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -364,8 +383,6 @@ void intel_hfi_online(unsigned int cpu) { struct hfi_instance *hfi_instance; struct hfi_cpu_info *info; - phys_addr_t hw_table_pa; - u64 msr_val; u16 die_id;
/* Nothing to do if hfi_instances are missing. */ @@ -403,14 +420,16 @@ void intel_hfi_online(unsigned int cpu) /* * Hardware is programmed with the physical address of the first page * frame of the table. Hence, the allocated memory must be page-aligned. + * + * Some processors do not forget the initial address of the HFI table + * even after having been reprogrammed. Keep using the same pages. Do + * not free them. */ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, GFP_KERNEL | __GFP_ZERO); if (!hfi_instance->hw_table) goto unlock;
- hw_table_pa = virt_to_phys(hfi_instance->hw_table); - /* * Allocate memory to keep a local copy of the table that * hardware generates. @@ -420,16 +439,6 @@ void intel_hfi_online(unsigned int cpu) if (!hfi_instance->local_table) goto free_hw_table;
- /* - * Program the address of the feedback table of this die/package. On - * some processors, hardware remembers the old address of the HFI table - * even after having been reprogrammed and re-enabled. Thus, do not free - * the pages allocated for the table or reprogram the hardware with a - * new base address. Namely, program the hardware only once. - */ - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); - init_hfi_instance(hfi_instance);
INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); @@ -438,13 +447,8 @@ void intel_hfi_online(unsigned int cpu)
cpumask_set_cpu(cpu, hfi_instance->cpus);
- /* - * Enable the hardware feedback interface and never disable it. See - * comment on programming the address of the table. - */ - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + hfi_set_hw_table(hfi_instance); + hfi_enable();
unlock: mutex_unlock(&hfi_instance_lock);
Previously, HFI instances were never disabled once enabled. A CPU in an instance only had to check during boot whether another CPU had previously initialized the instance and its corresponding data structure.
A subsequent changeset will add functionality to disable instances to support hibernation. Such change will also make possible to disable an HFI instance during runtime via CPU hotplug.
Enable an HFI instance from the first of its CPUs that comes online. This covers the boot, CPU hotplug, and resume-from-suspend cases. It also covers systems with one or more HFI instances (i.e., packages).
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- drivers/thermal/intel/intel_hfi.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 87ac7b196981..15c8c3b841d2 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -409,13 +409,12 @@ void intel_hfi_online(unsigned int cpu) /* * Now check if the HFI instance of the package/die of @cpu has been * initialized (by checking its header). In such case, all we have to - * do is to add @cpu to this instance's cpumask. + * do is to add @cpu to this instance's cpumask and enable the instance + * if needed. */ mutex_lock(&hfi_instance_lock); - if (hfi_instance->hdr) { - cpumask_set_cpu(cpu, hfi_instance->cpus); - goto unlock; - } + if (hfi_instance->hdr) + goto enable;
/* * Hardware is programmed with the physical address of the first page @@ -445,10 +444,14 @@ void intel_hfi_online(unsigned int cpu) raw_spin_lock_init(&hfi_instance->table_lock); raw_spin_lock_init(&hfi_instance->event_lock);
+enable: cpumask_set_cpu(cpu, hfi_instance->cpus);
- hfi_set_hw_table(hfi_instance); - hfi_enable(); + /* Enable this HFI instance if this is its first online CPU. */ + if (cpumask_weight(hfi_instance->cpus) == 1) { + hfi_set_hw_table(hfi_instance); + hfi_enable(); + }
unlock: mutex_unlock(&hfi_instance_lock);
In preparation to support hibernation, add functionality to disable an HFI instance during CPU offline. The last CPU of an instance that goes offline will disable such instance.
The Intel Software Development Manual states that the operating system must wait for the hardware to set MSR_IA32_PACKAGE_THERM_STATUS[26] after disabling an HFI instance to ensure that it will no longer write on the HFI memory. Some processors, however, do not ever set such bit. Wait a minimum of 2ms to give time hardware to complete any pending memory writes.
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 15c8c3b841d2..d2c874f43786 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -24,6 +24,7 @@ #include <linux/bitops.h> #include <linux/cpufeature.h> #include <linux/cpumask.h> +#include <linux/delay.h> #include <linux/gfp.h> #include <linux/io.h> #include <linux/kernel.h> @@ -366,6 +367,31 @@ static void hfi_set_hw_table(struct hfi_instance *hfi_instance) wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); }
+static void hfi_disable(void) +{ + u64 msr_val; + int i; + + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); + + /* + * Wait for hardware to acknowledge the disabling of HFI. Some + * processors may not do it. Wait for ~2ms. This is a reasonable + * time for hardware to complete any pending actions on the HFI + * memory. + */ + for (i = 0; i < 2000; i++) { + rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); + if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED) + break; + + udelay(1); + cpu_relax(); + } +} + /** * intel_hfi_online() - Enable HFI on @cpu * @cpu: CPU in which the HFI will be enabled @@ -491,6 +517,10 @@ void intel_hfi_offline(unsigned int cpu)
mutex_lock(&hfi_instance_lock); cpumask_clear_cpu(cpu, hfi_instance->cpus); + + if (!cpumask_weight(hfi_instance->cpus)) + hfi_disable(); + mutex_unlock(&hfi_instance_lock); }
The kernel gives the HFI hardware a memory region that the latter uses to provide updates to the HFI table. The kernel allocates this memory region at boot. It remains constant throughout runtime time.
When resuming from suspend or hibernation, the restore kernel allocates a second memory buffer and reprograms the HFI hardware with the new location as part of a normal boot. The location of the second memory buffer may differ from the one allocated by the image kernel. Subsequently, when the restore kernel transfers control to the image kernel, the second buffer becomes invalid, potentially leading to memory corruption if the hardware writes to it (hardware continues using the buffer from the restore kernel).
Add a suspend notifier to disable all HFI instances before jumping to the image kernel and enable them once the image kernel has been restored. Use the memory buffer that the image kernel allocated.
For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled and enabled during suspend and resume, respectively.
The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI instance of the boot CPU from the suspend notifier callback.
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index d2c874f43786..965c245e5e78 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -30,11 +30,13 @@ #include <linux/kernel.h> #include <linux/math.h> #include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/percpu-defs.h> #include <linux/printk.h> #include <linux/processor.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/suspend.h> #include <linux/string.h> #include <linux/topology.h> #include <linux/workqueue.h> @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void) return 0; }
+static void hfi_do_pm_enable(void *info) +{ + struct hfi_instance *hfi_instance = info; + + hfi_set_hw_table(hfi_instance); + hfi_enable(); +} + +static void hfi_do_pm_disable(void *info) +{ + hfi_disable(); +} + +static int hfi_pm_notify(struct notifier_block *nb, + unsigned long mode, void *unused) +{ + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); + struct hfi_instance *hfi_instance = info->hfi_instance; + + /* HFI may not be in use. */ + if (!hfi_instance) + return 0; + + /* + * Only handle the HFI instance of the package of the boot CPU. The + * instances of other packages are handled in the CPU hotplug callbacks. + */ + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + case PM_RESTORE_PREPARE: + return smp_call_function_single(0, hfi_do_pm_disable, + NULL, true); + + case PM_POST_RESTORE: + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + return smp_call_function_single(0, hfi_do_pm_enable, + hfi_instance, true); + default: + return -EINVAL; + } +} + +static struct notifier_block hfi_pm_nb = { + .notifier_call = hfi_pm_notify, +}; + void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; int i, j;
+ if (register_pm_notifier(&hfi_pm_nb)) + return; + if (hfi_parse_features()) return;
On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri ricardo.neri-calderon@linux.intel.com wrote:
The kernel gives the HFI hardware a memory region that the latter uses to provide updates to the HFI table. The kernel allocates this memory region at boot. It remains constant throughout runtime time.
When resuming from suspend or hibernation, the restore kernel allocates a second memory buffer and reprograms the HFI hardware with the new location as part of a normal boot. The location of the second memory buffer may differ from the one allocated by the image kernel. Subsequently, when the restore kernel transfers control to the image kernel, the second buffer becomes invalid, potentially leading to memory corruption if the hardware writes to it (hardware continues using the buffer from the restore kernel).
Add a suspend notifier to disable all HFI instances before jumping to the image kernel and enable them once the image kernel has been restored. Use the memory buffer that the image kernel allocated.
For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled and enabled during suspend and resume, respectively.
The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI instance of the boot CPU from the suspend notifier callback.
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com
drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index d2c874f43786..965c245e5e78 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -30,11 +30,13 @@ #include <linux/kernel.h> #include <linux/math.h> #include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/percpu-defs.h> #include <linux/printk.h> #include <linux/processor.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/suspend.h> #include <linux/string.h> #include <linux/topology.h> #include <linux/workqueue.h> @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void) return 0; }
+static void hfi_do_pm_enable(void *info) +{
struct hfi_instance *hfi_instance = info;
hfi_set_hw_table(hfi_instance);
hfi_enable();
The above do RMW, so should locking be used here?
+}
+static void hfi_do_pm_disable(void *info) +{
hfi_disable();
+}
And here?
+static int hfi_pm_notify(struct notifier_block *nb,
unsigned long mode, void *unused)
+{
struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
struct hfi_instance *hfi_instance = info->hfi_instance;
/* HFI may not be in use. */
if (!hfi_instance)
return 0;
/*
* Only handle the HFI instance of the package of the boot CPU. The
* instances of other packages are handled in the CPU hotplug callbacks.
*/
switch (mode) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
case PM_RESTORE_PREPARE:
return smp_call_function_single(0, hfi_do_pm_disable,
NULL, true);
case PM_POST_RESTORE:
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
return smp_call_function_single(0, hfi_do_pm_enable,
hfi_instance, true);
default:
return -EINVAL;
}
+}
+static struct notifier_block hfi_pm_nb = {
.notifier_call = hfi_pm_notify,
+};
void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; int i, j;
if (register_pm_notifier(&hfi_pm_nb))
return;
if (hfi_parse_features()) return;
--
On Fri, Dec 29, 2023 at 06:27:30PM +0100, Rafael J. Wysocki wrote:
On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri ricardo.neri-calderon@linux.intel.com wrote:
The kernel gives the HFI hardware a memory region that the latter uses to provide updates to the HFI table. The kernel allocates this memory region at boot. It remains constant throughout runtime time.
When resuming from suspend or hibernation, the restore kernel allocates a second memory buffer and reprograms the HFI hardware with the new location as part of a normal boot. The location of the second memory buffer may differ from the one allocated by the image kernel. Subsequently, when the restore kernel transfers control to the image kernel, the second buffer becomes invalid, potentially leading to memory corruption if the hardware writes to it (hardware continues using the buffer from the restore kernel).
Add a suspend notifier to disable all HFI instances before jumping to the image kernel and enable them once the image kernel has been restored. Use the memory buffer that the image kernel allocated.
For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled and enabled during suspend and resume, respectively.
The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI instance of the boot CPU from the suspend notifier callback.
Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Stanislaw Gruszka stanislaw.gruszka@linux.intel.com Cc: Zhang Rui rui.zhang@intel.com Cc: Zhao Liu zhao1.liu@linux.intel.com Cc: linux-pm@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com
drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index d2c874f43786..965c245e5e78 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -30,11 +30,13 @@ #include <linux/kernel.h> #include <linux/math.h> #include <linux/mutex.h> +#include <linux/notifier.h> #include <linux/percpu-defs.h> #include <linux/printk.h> #include <linux/processor.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/suspend.h> #include <linux/string.h> #include <linux/topology.h> #include <linux/workqueue.h> @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void) return 0; }
+static void hfi_do_pm_enable(void *info) +{
struct hfi_instance *hfi_instance = info;
hfi_set_hw_table(hfi_instance);
hfi_enable();
The above do RMW, so should locking be used here?
+}
+static void hfi_do_pm_disable(void *info) +{
hfi_disable();
+}
And here?
On single-package systems, HFI enable/disable only happens on the boot CPU, via either the CPU hotplug callbacks or the suspend notifier. It is unlikely they will run concurrently, IMO. It also looks as good hygiene to me to use locking, just in case. I will use the hfi_instance_lock in my next version.
On multi-package systems, HFI instance of non-boot packages are always handled via the CPU hotplug callbacks.
+static int hfi_pm_notify(struct notifier_block *nb,
unsigned long mode, void *unused)
+{
struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
struct hfi_instance *hfi_instance = info->hfi_instance;
/* HFI may not be in use. */
if (!hfi_instance)
return 0;
/*
* Only handle the HFI instance of the package of the boot CPU. The
* instances of other packages are handled in the CPU hotplug callbacks.
*/
switch (mode) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
case PM_RESTORE_PREPARE:
return smp_call_function_single(0, hfi_do_pm_disable,
NULL, true);
case PM_POST_RESTORE:
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
return smp_call_function_single(0, hfi_do_pm_enable,
hfi_instance, true);
default:
return -EINVAL;
}
+}
+static struct notifier_block hfi_pm_nb = {
.notifier_call = hfi_pm_notify,
+};
void __init intel_hfi_init(void) { struct hfi_instance *hfi_instance; int i, j;
if (register_pm_notifier(&hfi_pm_nb))
return;
if (hfi_parse_features()) return;
--
linux-stable-mirror@lists.linaro.org