From: Rui Zhang zr.zhang@vivo.com
[ Upstream commit 7993d3a9c34f609c02171e115fd12c10e2105ff4 ]
The use_count of a regulator should only be incremented when the enable_count changes from 0 to 1. Similarly, the use_count should only be decremented when the enable_count changes from 1 to 0.
In the previous implementation, use_count was sometimes decremented to 0 when some consumer called unbalanced disable, leading to unexpected disable even the regulator is enabled by other consumers. With this change, the use_count accurately reflects the number of users which the regulator is enabled.
This should make things more robust in the case where a consumer does leak references.
Signed-off-by: Rui Zhang zr.zhang@vivo.com Link: https://lore.kernel.org/r/20231103074231.8031-1-zr.zhang@vivo.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/regulator/core.c | 56 +++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 26 deletions(-)
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 3137e40fcd3e..a7b3e548ea5a 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2918,7 +2918,8 @@ static int _regulator_enable(struct regulator *regulator) /* Fallthrough on positive return values - already enabled */ }
- rdev->use_count++; + if (regulator->enable_count == 1) + rdev->use_count++;
return 0;
@@ -2993,37 +2994,40 @@ static int _regulator_disable(struct regulator *regulator)
lockdep_assert_held_once(&rdev->mutex.base);
- if (WARN(rdev->use_count <= 0, + if (WARN(regulator->enable_count == 0, "unbalanced disables for %s\n", rdev_get_name(rdev))) return -EIO;
- /* are we the last user and permitted to disable ? */ - if (rdev->use_count == 1 && - (rdev->constraints && !rdev->constraints->always_on)) { - - /* we are last user */ - if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { - ret = _notifier_call_chain(rdev, - REGULATOR_EVENT_PRE_DISABLE, - NULL); - if (ret & NOTIFY_STOP_MASK) - return -EINVAL; - - ret = _regulator_do_disable(rdev); - if (ret < 0) { - rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); - _notifier_call_chain(rdev, - REGULATOR_EVENT_ABORT_DISABLE, + if (regulator->enable_count == 1) { + /* disabling last enable_count from this regulator */ + /* are we the last user and permitted to disable ? */ + if (rdev->use_count == 1 && + (rdev->constraints && !rdev->constraints->always_on)) { + + /* we are last user */ + if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) { + ret = _notifier_call_chain(rdev, + REGULATOR_EVENT_PRE_DISABLE, + NULL); + if (ret & NOTIFY_STOP_MASK) + return -EINVAL; + + ret = _regulator_do_disable(rdev); + if (ret < 0) { + rdev_err(rdev, "failed to disable: %pe\n", ERR_PTR(ret)); + _notifier_call_chain(rdev, + REGULATOR_EVENT_ABORT_DISABLE, + NULL); + return ret; + } + _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, NULL); - return ret; } - _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, - NULL); - }
- rdev->use_count = 0; - } else if (rdev->use_count > 1) { - rdev->use_count--; + rdev->use_count = 0; + } else if (rdev->use_count > 1) { + rdev->use_count--; + } }
if (ret == 0)
From: Chris Riches chris.riches@nutanix.com
[ Upstream commit 022732e3d846e197539712e51ecada90ded0572a ]
When auditd_set sets the auditd_conn pointer, audit messages can immediately be put on the socket by other kernel threads. If the backlog is large or the rate is high, this can immediately fill the socket buffer. If the audit daemon requested an ACK for this operation, a full socket buffer causes the ACK to get dropped, also setting ENOBUFS on the socket.
To avoid this race and ensure ACKs get through, fast-track the ACK in this specific case to ensure it is sent before auditd_conn is set.
Signed-off-by: Chris Riches chris.riches@nutanix.com [PM: fix some tab vs space damage] Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/audit.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c index 16205dd29843..9c8e5f732c4c 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -487,15 +487,19 @@ static void auditd_conn_free(struct rcu_head *rcu) * @pid: auditd PID * @portid: auditd netlink portid * @net: auditd network namespace pointer + * @skb: the netlink command from the audit daemon + * @ack: netlink ack flag, cleared if ack'd here * * Description: * This function will obtain and drop network namespace references as * necessary. Returns zero on success, negative values on failure. */ -static int auditd_set(struct pid *pid, u32 portid, struct net *net) +static int auditd_set(struct pid *pid, u32 portid, struct net *net, + struct sk_buff *skb, bool *ack) { unsigned long flags; struct auditd_connection *ac_old, *ac_new; + struct nlmsghdr *nlh;
if (!pid || !net) return -EINVAL; @@ -507,6 +511,13 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net) ac_new->portid = portid; ac_new->net = get_net(net);
+ /* send the ack now to avoid a race with the queue backlog */ + if (*ack) { + nlh = nlmsg_hdr(skb); + netlink_ack(skb, nlh, 0, NULL); + *ack = false; + } + spin_lock_irqsave(&auditd_conn_lock, flags); ac_old = rcu_dereference_protected(auditd_conn, lockdep_is_held(&auditd_conn_lock)); @@ -1200,7 +1211,8 @@ static int audit_replace(struct pid *pid) return auditd_send_unicast_skb(skb); }
-static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, + bool *ack) { u32 seq; void *data; @@ -1293,7 +1305,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* register a new auditd connection */ err = auditd_set(req_pid, NETLINK_CB(skb).portid, - sock_net(NETLINK_CB(skb).sk)); + sock_net(NETLINK_CB(skb).sk), + skb, ack); if (audit_enabled != AUDIT_OFF) audit_log_config_change("audit_pid", new_pid, @@ -1538,9 +1551,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) * Parse the provided skb and deal with any messages that may be present, * malformed skbs are discarded. */ -static void audit_receive(struct sk_buff *skb) +static void audit_receive(struct sk_buff *skb) { struct nlmsghdr *nlh; + bool ack; /* * len MUST be signed for nlmsg_next to be able to dec it below 0 * if the nlmsg_len was not aligned @@ -1553,9 +1567,12 @@ static void audit_receive(struct sk_buff *skb)
audit_ctl_lock(); while (nlmsg_ok(nlh, len)) { - err = audit_receive_msg(skb, nlh); - /* if err or if this message says it wants a response */ - if (err || (nlh->nlmsg_flags & NLM_F_ACK)) + ack = nlh->nlmsg_flags & NLM_F_ACK; + err = audit_receive_msg(skb, nlh, &ack); + + /* send an ack if the user asked for one and audit_receive_msg + * didn't already do it, or if there was an error. */ + if (ack || err) netlink_ack(skb, nlh, err, NULL);
nlh = nlmsg_next(nlh, &len);
From: Avadhut Naik Avadhut.Naik@amd.com
[ Upstream commit 0706526ec7704dcd046239078ac175d11a88a95e ]
The debugfs_create_blob() function has been used to create read-only binary blobs in debugfs. The function filters out permissions, other than S_IRUSR, S_IRGRP and S_IROTH, provided while creating the blobs.
The very behavior though is being changed through previous patch in the series (fs: debugfs: Add write functionality to debugfs blobs) which makes the binary blobs writable by owners. Thus, all permissions provided while creating the blobs, except S_IRUSR,S_IWUSR, S_IRGRP, S_IROTH, will be filtered by debugfs_create_blob().
As such, rectify the permissions of panicinfo file since the S_IFREG flag was anyways being filtered out by debugfs_create_blob(). Moreover, the very flag will always be set be set for the panicinfo file through __debugfs_create_file().
Signed-off-by: Avadhut Naik Avadhut.Naik@amd.com Reviewed-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Reviewed-by: Tony Luck tony.luck@intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/chrome/cros_ec_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 091fdc154d79..6bf6f0e7b597 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -454,7 +454,7 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info) debug_info->panicinfo_blob.data = data; debug_info->panicinfo_blob.size = ret;
- debugfs_create_blob("panicinfo", S_IFREG | 0444, debug_info->dir, + debugfs_create_blob("panicinfo", 0444, debug_info->dir, &debug_info->panicinfo_blob);
return 0;
From: Yuntao Wang ytcoode@gmail.com
[ Upstream commit 4b3805daaacb2168665c6222f261e68accb120dc ]
The original intention of acpi_parse_entries_array() is to return the number of all matching entries on success. This number may be greater than the value of the max_entries parameter. When this happens, the function will output a warning message, indicating that `count - max_entries` matching entries remain unprocessed and have been ignored.
However, commit 4ceacd02f5a1 ("ACPI / table: Always count matched and successfully parsed entries") changed this logic to return the number of entries successfully processed by the handler. In this case, when the max_entries parameter is not zero, the number of entries successfully processed can never be greater than the value of max_entries. In other words, the expression `count > max_entries` will always evaluate to false. This means that the logic in the final if statement will never be executed.
Commit 99b0efd7c886 ("ACPI / tables: do not report the number of entries ignored by acpi_parse_entries()") mentioned this issue, but it tried to fix it by removing part of the warning message. This is meaningless because the pr_warn statement will never be executed in the first place.
Commit 8726d4f44150 ("ACPI / tables: fix acpi_parse_entries_array() so it traverses all subtables") introduced an errs variable, which is intended to make acpi_parse_entries_array() always traverse all of the subtables, calling as many of the callbacks as possible. However, it seems that the commit does not achieve this goal. For example, when a handler returns an error, none of the handlers will be called again in the subsequent iterations. This result appears to be no different from before the change.
This patch corrects and cleans up the logic of acpi_parse_entries_array(), making it return the number of all matching entries, rather than the number of entries successfully processed by handlers. Additionally, if an error occurs when executing a handler, the function will return -EINVAL immediately.
This patch should not affect existing users of acpi_parse_entries_array().
Signed-off-by: Yuntao Wang ytcoode@gmail.com Reviewed-by: Dave Jiang dave.jiang@intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- lib/fw_table.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-)
diff --git a/lib/fw_table.c b/lib/fw_table.c index 294df54e33b6..c49a09ee3853 100644 --- a/lib/fw_table.c +++ b/lib/fw_table.c @@ -85,11 +85,6 @@ acpi_get_subtable_type(char *id) return ACPI_SUBTABLE_COMMON; }
-static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc) -{ - return proc->handler || proc->handler_arg; -} - static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, union acpi_subtable_headers *hdr, unsigned long end) @@ -133,7 +128,6 @@ acpi_parse_entries_array(char *id, unsigned long table_size, unsigned long table_end, subtable_len, entry_len; struct acpi_subtable_entry entry; int count = 0; - int errs = 0; int i;
table_end = (unsigned long)table_header + table_header->length; @@ -145,25 +139,19 @@ acpi_parse_entries_array(char *id, unsigned long table_size, ((unsigned long)table_header + table_size); subtable_len = acpi_get_subtable_header_length(&entry);
- while (((unsigned long)entry.hdr) + subtable_len < table_end) { - if (max_entries && count >= max_entries) - break; - + while (((unsigned long)entry.hdr) + subtable_len < table_end) { for (i = 0; i < proc_num; i++) { if (acpi_get_entry_type(&entry) != proc[i].id) continue; - if (!has_handler(&proc[i]) || - (!errs && - call_handler(&proc[i], entry.hdr, table_end))) { - errs++; - continue; - } + + if (!max_entries || count < max_entries) + if (call_handler(&proc[i], entry.hdr, table_end)) + return -EINVAL;
proc[i].count++; + count++; break; } - if (i != proc_num) - count++;
/* * If entry->length is 0, break from this loop to avoid @@ -180,9 +168,9 @@ acpi_parse_entries_array(char *id, unsigned long table_size, }
if (max_entries && count > max_entries) { - pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n", - id, proc->id, count); + pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n", + id, proc->id, count - max_entries, count); }
- return errs ? -EINVAL : count; + return count; }
From: Srikar Srimath Tirumala srikars@nvidia.com
[ Upstream commit 310293a2b94197f3d75e65ab22672287a7938a00 ]
Current implementation of processor_thermal performs software throttling in fixed steps of "20%" which can be too coarse for some platforms. We observed some performance gain after reducing the throttle percentage. Change the CPUFREQ thermal reduction percentage and maximum thermal steps to be configurable. Also, update the default values of both for Nvidia Tegra241 (Grace) SoC. The thermal reduction percentage is reduced to "5%" and accordingly the maximum number of thermal steps are increased as they are derived from the reduction percentage.
Signed-off-by: Srikar Srimath Tirumala srikars@nvidia.com Co-developed-by: Sumit Gupta sumitg@nvidia.com Signed-off-by: Sumit Gupta sumitg@nvidia.com Acked-by: Sudeep Holla sudeep.holla@arm.com Acked-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/thermal_cpufreq.c | 20 ++++++++++++ drivers/acpi/internal.h | 9 +++++ drivers/acpi/processor_thermal.c | 49 +++++++++++++++++++++++----- 4 files changed, 70 insertions(+), 9 deletions(-) create mode 100644 drivers/acpi/arm64/thermal_cpufreq.c
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 143debc1ba4a..726944648c9b 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_APMT) += apmt.o obj-$(CONFIG_ARM_AMBA) += amba.o obj-y += dma.o init.o +obj-y += thermal_cpufreq.o diff --git a/drivers/acpi/arm64/thermal_cpufreq.c b/drivers/acpi/arm64/thermal_cpufreq.c new file mode 100644 index 000000000000..d524f2cd6044 --- /dev/null +++ b/drivers/acpi/arm64/thermal_cpufreq.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/acpi.h> + +#include "../internal.h" + +#define SMCCC_SOC_ID_T241 0x036b0241 + +int acpi_arch_thermal_cpufreq_pctg(void) +{ + s32 soc_id = arm_smccc_get_soc_id_version(); + + /* + * Check JEP106 code for NVIDIA Tegra241 chip (036b:0241) and + * reduce the CPUFREQ Thermal reduction percentage to 5%. + */ + if (soc_id == SMCCC_SOC_ID_T241) + return 5; + + return 0; +} diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 866c7c4ed233..af1198e2b526 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -85,6 +85,15 @@ bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent); acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context); void acpi_scan_table_notify(void);
+#ifdef CONFIG_ARM64 +int acpi_arch_thermal_cpufreq_pctg(void); +#else +static inline int acpi_arch_thermal_cpufreq_pctg(void) +{ + return 0; +} +#endif + /* -------------------------------------------------------------------------- Device Node Initialization / Removal -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index b7c6287eccca..1219adb11ab9 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -17,6 +17,8 @@ #include <acpi/processor.h> #include <linux/uaccess.h>
+#include "internal.h" + #ifdef CONFIG_CPU_FREQ
/* If a passive cooling situation is detected, primarily CPUfreq is used, as it @@ -26,12 +28,21 @@ */
#define CPUFREQ_THERMAL_MIN_STEP 0 -#define CPUFREQ_THERMAL_MAX_STEP 3
-static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg); +static int cpufreq_thermal_max_step __read_mostly = 3; + +/* + * Minimum throttle percentage for processor_thermal cooling device. + * The processor_thermal driver uses it to calculate the percentage amount by + * which cpu frequency must be reduced for each cooling state. This is also used + * to calculate the maximum number of throttling steps or cooling states. + */ +static int cpufreq_thermal_reduction_pctg __read_mostly = 20;
-#define reduction_pctg(cpu) \ - per_cpu(cpufreq_thermal_reduction_pctg, phys_package_first_cpu(cpu)) +static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_step); + +#define reduction_step(cpu) \ + per_cpu(cpufreq_thermal_reduction_step, phys_package_first_cpu(cpu))
/* * Emulate "per package data" using per cpu data (which should really be @@ -71,7 +82,7 @@ static int cpufreq_get_max_state(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return 0;
- return CPUFREQ_THERMAL_MAX_STEP; + return cpufreq_thermal_max_step; }
static int cpufreq_get_cur_state(unsigned int cpu) @@ -79,7 +90,7 @@ static int cpufreq_get_cur_state(unsigned int cpu) if (!cpu_has_cpufreq(cpu)) return 0;
- return reduction_pctg(cpu); + return reduction_step(cpu); }
static int cpufreq_set_cur_state(unsigned int cpu, int state) @@ -92,7 +103,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) if (!cpu_has_cpufreq(cpu)) return 0;
- reduction_pctg(cpu) = state; + reduction_step(cpu) = state;
/* * Update all the CPUs in the same package because they all @@ -113,7 +124,8 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) if (!policy) return -EINVAL;
- max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100; + max_freq = (policy->cpuinfo.max_freq * + (100 - reduction_step(i) * cpufreq_thermal_reduction_pctg)) / 100;
cpufreq_cpu_put(policy);
@@ -126,10 +138,29 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state) return 0; }
+static void acpi_thermal_cpufreq_config(void) +{ + int cpufreq_pctg = acpi_arch_thermal_cpufreq_pctg(); + + if (!cpufreq_pctg) + return; + + cpufreq_thermal_reduction_pctg = cpufreq_pctg; + + /* + * Derive the MAX_STEP from minimum throttle percentage so that the reduction + * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that + * the CPU performance doesn't become 0. + */ + cpufreq_thermal_max_step = (100 / cpufreq_pctg) - 2; +} + void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy) { unsigned int cpu;
+ acpi_thermal_cpufreq_config(); + for_each_cpu(cpu, policy->related_cpus) { struct acpi_processor *pr = per_cpu(processors, cpu); int ret; @@ -190,7 +221,7 @@ static int acpi_processor_max_state(struct acpi_processor *pr)
/* * There exists four states according to - * cpufreq_thermal_reduction_pctg. 0, 1, 2, 3 + * cpufreq_thermal_reduction_step. 0, 1, 2, 3 */ max_state += cpufreq_get_max_state(pr->id); if (pr->flags.throttling)
From: Yuluo Qiu qyl27@outlook.com
[ Upstream commit 143176a46bdd3bfbe9ba2462bf94458e80d65ebf ]
The Colorful X15 AT 23 ACPI video-bus device report spurious ACPI_VIDEO_NOTIFY_CYCLE events resulting in spurious KEY_SWITCHVIDEOMODE events being reported to userspace (and causing trouble there) when an external screen plugged in.
Add a quirk setting the report_key_events mask to REPORT_BRIGHTNESS_KEY_EVENTS so that the ACPI_VIDEO_NOTIFY_CYCLE events will be ignored, while still reporting brightness up/down hotkey-presses to userspace normally.
Signed-off-by: Yuluo Qiu qyl27@outlook.com Co-developed-by: Celeste Liu CoelacanthusHex@gmail.com Signed-off-by: Celeste Liu CoelacanthusHex@gmail.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/acpi_video.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 6cee536c229a..551e372cb518 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -500,6 +500,15 @@ static const struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3350"), }, }, + { + .callback = video_set_report_key_events, + .driver_data = (void *)((uintptr_t)REPORT_BRIGHTNESS_KEY_EVENTS), + .ident = "COLORFUL X15 AT 23", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "COLORFUL"), + DMI_MATCH(DMI_PRODUCT_NAME, "X15 AT 23"), + }, + }, /* * Some machines change the brightness themselves when a brightness * hotkey gets pressed, despite us telling them not to. In this case
From: Dmitry Antipov dmantipov@yandex.ru
[ Upstream commit ba3f5058db437d919f8468db50483dd9028ff688 ]
When compiling with gcc version 14.0.0 20231126 (experimental) and CONFIG_FORTIFY_SOURCE=y, I've noticed the following:
In file included from ./include/linux/string.h:295, from ./include/linux/bitmap.h:12, from ./include/linux/cpumask.h:12, from ./arch/x86/include/asm/paravirt.h:17, from ./arch/x86/include/asm/cpuid.h:62, from ./arch/x86/include/asm/processor.h:19, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:53, from ./include/linux/thread_info.h:60, from ./arch/x86/include/asm/preempt.h:9, from ./include/linux/preempt.h:79, from ./include/linux/spinlock.h:56, from ./include/linux/mmzone.h:8, from ./include/linux/gfp.h:7, from ./include/linux/slab.h:16, from ./include/linux/resource_ext.h:11, from ./include/linux/acpi.h:13, from drivers/pnp/pnpacpi/rsparser.c:11: In function 'fortify_memcpy_chk', inlined from 'pnpacpi_parse_allocated_vendor' at drivers/pnp/pnpacpi/rsparser.c:158:3, inlined from 'pnpacpi_allocated_resource' at drivers/pnp/pnpacpi/rsparser.c:249:3: ./include/linux/fortify-string.h:588:25: warning: call to '__read_overflow2_field' declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 588 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
According to the comments in include/linux/fortify-string.h, 'memcpy()', 'memmove()' and 'memset()' must not be used beyond individual struct members to ensure that the compiler can enforce protection against buffer overflows, and, IIUC, this also applies to partial copies from the particular member ('vendor->byte_data' in this case). So it should be better (and safer) to do both copies at once (and 'byte_data' of 'struct acpi_resource_vendor_typed' seems to be a good candidate for '__counted_by(byte_length)' as well).
Signed-off-by: Dmitry Antipov dmantipov@yandex.ru Reviewed-by: Kees Cook keescook@chromium.org Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pnp/pnpacpi/rsparser.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 4f05f610391b..c02ce0834c2c 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -151,13 +151,13 @@ static int vendor_resource_matches(struct pnp_dev *dev, static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, struct acpi_resource_vendor_typed *vendor) { - if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) { - u64 start, length; + struct { u64 start, length; } range;
- memcpy(&start, vendor->byte_data, sizeof(start)); - memcpy(&length, vendor->byte_data + 8, sizeof(length)); - - pnp_add_mem_resource(dev, start, start + length - 1, 0); + if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, + sizeof(range))) { + memcpy(&range, vendor->byte_data, sizeof(range)); + pnp_add_mem_resource(dev, range.start, range.start + + range.length - 1, 0); } }
From: Prarit Bhargava prarit@redhat.com
[ Upstream commit 72d9b9747e78979510e9aafdd32eb99c7aa30dd1 ]
The gcc plugin -fanalyzer [1] tries to detect various patterns of incorrect behaviour. The tool reports:
drivers/acpi/acpi_extlog.c: In function ‘extlog_exit’: drivers/acpi/acpi_extlog.c:307:12: warning: check of ‘extlog_l1_addr’ for NULL after already dereferencing it [-Wanalyzer-deref-before-check] | | 306 | ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; | | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~ | | | | | (1) pointer ‘extlog_l1_addr’ is dereferenced here | 307 | if (extlog_l1_addr) | | ~ | | | | | (2) pointer ‘extlog_l1_addr’ is checked for NULL here but it was already dereferenced at (1) |
Fix the NULL pointer dereference check in extlog_exit().
Link: https://gcc.gnu.org/onlinedocs/gcc-10.1.0/gcc/Static-Analyzer-Options.html # [1]
Signed-off-by: Prarit Bhargava prarit@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/acpi_extlog.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index e120a96e1eae..193147769146 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -303,9 +303,10 @@ static int __init extlog_init(void) static void __exit extlog_exit(void) { mce_unregister_decode_chain(&extlog_mce_dec); - ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; - if (extlog_l1_addr) + if (extlog_l1_addr) { + ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; acpi_os_unmap_iomem(extlog_l1_addr, l1_size); + } if (elog_addr) acpi_os_unmap_iomem(elog_addr, elog_size); release_mem_region(elog_base, elog_size);
From: Thomas Weißschuh linux@weissschuh.net
[ Upstream commit bdeeeaba83682225a7bf5f100fe8652a59590d33 ]
qemu for LoongArch does not work properly with direct kernel boot. The kernel will panic during initialization and hang without any output.
When booting in EFI mode everything work correctly.
While users most likely don't have the LoongArch EFI binary installed at least an explicit error about 'file not found' is better than a hanging test without output that can never succeed.
Link: https://lore.kernel.org/loongarch/1738d60a-df3a-4102-b1da-d16a29b6e06a@t-8ch... Signed-off-by: Thomas Weißschuh linux@weissschuh.net Acked-by: Willy Tarreau w@1wt.eu Link: https://lore.kernel.org/r/20231031-nolibc-out-of-tree-v1-1-47c92f73590a@weis... Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/nolibc/Makefile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index a0fc07253baf..eb258ae1d948 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -88,6 +88,13 @@ QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
+QEMU_BIOS_DIR = /usr/share/edk2/ +QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd + +ifneq ($(QEMU_BIOS_$(XARCH)),) +QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) +endif + # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -101,7 +108,7 @@ QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_EXTRA) +QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
# OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory.
From: Thomas Weißschuh linux@weissschuh.net
[ Upstream commit 07f679b50252dc9e3d0c19aca5801f82c230c527 ]
Center-align all possible status reports. Before OK and FAIL were center-aligned in relation to each other but SKIPPED and FAILED would be left-aligned.
Before:
7 environ_addr = <0x7fffef3e7c50> [OK] 8 environ_envp = <0x7fffef3e7c58> [FAIL] 9 environ_auxv [SKIPPED] 10 environ_total [SKIPPED] 11 environ_HOME = <0x7fffef3e99bd> [OK] 12 auxv_addr [SKIPPED] 13 auxv_AT_UID = 1000 [OK]
After:
7 environ_addr = <0x7ffff13b00a0> [OK] 8 environ_envp = <0x7ffff13b00a8> [FAIL] 9 environ_auxv [SKIPPED] 10 environ_total [SKIPPED] 11 environ_HOME = <0x7ffff13b19bd> [OK] 12 auxv_addr [SKIPPED] 13 auxv_AT_UID = 1000 [OK]
Signed-off-by: Thomas Weißschuh linux@weissschuh.net Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/selftests/nolibc/nolibc-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 2f10541e6f38..e173014f6b66 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -150,11 +150,11 @@ static void result(int llen, enum RESULT r) const char *msg;
if (r == OK) - msg = " [OK]"; + msg = " [OK]"; else if (r == SKIPPED) msg = "[SKIPPED]"; else - msg = "[FAIL]"; + msg = " [FAIL]";
if (llen < 64) putcharn(' ', 64 - llen);
From: Yuntao Wang ytcoode@gmail.com
[ Upstream commit e3f577830ce216b0ca21d4750cbbd64cfc21efff ]
The for loop does not iterate over the last element of the node_to_pxm_map array. This could lead to a conflict between the final fake_pxm value and the existing pxm values. That is, the final fake_pxm value can not be guaranteed to be an unused pxm value.
While at it, fix up white space in slit_valid().
Signed-off-by: Yuntao Wang ytcoode@gmail.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/numa/srat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 12f330b0eac0..b57de78fbf14 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -183,7 +183,7 @@ static int __init slit_valid(struct acpi_table_slit *slit) int i, j; int d = slit->locality_count; for (i = 0; i < d; i++) { - for (j = 0; j < d; j++) { + for (j = 0; j < d; j++) { u8 val = slit->entry[d*i + j]; if (i == j) { if (val != LOCAL_DISTANCE) @@ -532,7 +532,7 @@ int __init acpi_numa_init(void) */
/* fake_pxm is the next unused PXM value after SRAT parsing */ - for (i = 0, fake_pxm = -1; i < MAX_NUMNODES - 1; i++) { + for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) { if (node_to_pxm_map[i] > fake_pxm) fake_pxm = node_to_pxm_map[i]; }
From: Rae Moar rmoar@google.com
[ Upstream commit 8ae27bc7fff4ef467a7964821a6cedb34a05d3b2 ]
Add parsing of attributes as diagnostic data. Fixes issue with test plan being parsed incorrectly as diagnostic data when located after suite-level attributes.
Note that if there does not exist a test plan line, the diagnostic lines between the suite header and the first result will be saved in the suite log rather than the first test case log.
Signed-off-by: Rae Moar rmoar@google.com Reviewed-by: David Gow davidgow@google.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- tools/testing/kunit/kunit_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 79d8832c862a..ce34be15c929 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -450,7 +450,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]: Log of diagnostic lines """ log = [] # type: List[str] - non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START] + non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START, TEST_PLAN] while lines and not any(re.match(lines.peek()) for re in non_diagnostic_lines): log.append(lines.pop()) @@ -726,6 +726,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # test plan test.name = "main" ktap_line = parse_ktap_header(lines, test) + test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True else: @@ -737,6 +738,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: if parent_test: # If KTAP version line and/or subtest header is found, attempt # to parse test plan and print test header + test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) print_test_header(test) expected_count = test.expected_count
From: Michal Wajdeczko michal.wajdeczko@intel.com
[ Upstream commit 342fb9789267ee3908959bfa136b82e88e2ce918 ]
If we run parameterized test that uses test->priv to prepare some custom data, then value of test->priv will leak to the next param iteration and may be unexpected. This could be easily seen if we promote example_priv_test to parameterized test as then only first test iteration will be successful:
$ ./tools/testing/kunit/kunit.py run \ --kunitconfig ./lib/kunit/.kunitconfig *.example_priv*
[ ] Starting KUnit Kernel (1/1)... [ ] ============================================================ [ ] =================== example (1 subtest) ==================== [ ] ==================== example_priv_test ==================== [ ] [PASSED] example value 3 [ ] # example_priv_test: initializing [ ] # example_priv_test: ASSERTION FAILED at lib/kunit/kunit-example-test.c:230 [ ] Expected test->priv == ((void *)0), but [ ] test->priv == 0000000060dfe290 [ ] ((void *)0) == 0000000000000000 [ ] # example_priv_test: cleaning up [ ] [FAILED] example value 2 [ ] # example_priv_test: initializing [ ] # example_priv_test: ASSERTION FAILED at lib/kunit/kunit-example-test.c:230 [ ] Expected test->priv == ((void *)0), but [ ] test->priv == 0000000060dfe290 [ ] ((void *)0) == 0000000000000000 [ ] # example_priv_test: cleaning up [ ] [FAILED] example value 1 [ ] # example_priv_test: initializing [ ] # example_priv_test: ASSERTION FAILED at lib/kunit/kunit-example-test.c:230 [ ] Expected test->priv == ((void *)0), but [ ] test->priv == 0000000060dfe290 [ ] ((void *)0) == 0000000000000000 [ ] # example_priv_test: cleaning up [ ] [FAILED] example value 0 [ ] # example_priv_test: initializing [ ] # example_priv_test: cleaning up [ ] # example_priv_test: pass:1 fail:3 skip:0 total:4 [ ] ================ [FAILED] example_priv_test ================ [ ] # example: initializing suite [ ] # module: kunit_example_test [ ] # example: exiting suite [ ] # Totals: pass:1 fail:3 skip:0 total:4 [ ] ===================== [FAILED] example =====================
Fix that by resetting test->priv after each param iteration, in similar way what we did for the test->status.
Signed-off-by: Michal Wajdeczko michal.wajdeczko@intel.com Cc: David Gow davidgow@google.com Cc: Rae Moar rmoar@google.com Reviewed-by: David Gow davidgow@google.com Signed-off-by: Shuah Khan skhan@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org --- lib/kunit/test.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 7aceb07a1af9..1cdc405daa30 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -660,6 +660,7 @@ int kunit_run_tests(struct kunit_suite *suite) test.param_index++; test.status = KUNIT_SUCCESS; test.status_comment[0] = '\0'; + test.priv = NULL; } }
From: Mukesh Ojha quic_mojha@quicinc.com
[ Upstream commit aed5ed595960c6d301dcd4ed31aeaa7a8054c0c6 ]
There is a chance if a frequent switch of the governor done in a loop result in timer list corruption where timer cancel being done from two place one from cancel_delayed_work_sync() and followed by expire_timers() can be seen from the traces[1].
while true do echo "simple_ondemand" > /sys/class/devfreq/1d84000.ufshc/governor echo "performance" > /sys/class/devfreq/1d84000.ufshc/governor done
It looks to be issue with devfreq driver where device_monitor_[start/stop] need to synchronized so that delayed work should get corrupted while it is either being queued or running or being cancelled.
Let's use polling flag and devfreq lock to synchronize the queueing the timer instance twice and work data being corrupted.
[1] ... .. <idle>-0 [003] 9436.209662: timer_cancel timer=0xffffff80444f0428 <idle>-0 [003] 9436.209664: timer_expire_entry timer=0xffffff80444f0428 now=0x10022da1c function=__typeid__ZTSFvP10timer_listE_global_addr baseclk=0x10022da1c <idle>-0 [003] 9436.209718: timer_expire_exit timer=0xffffff80444f0428 kworker/u16:6-14217 [003] 9436.209863: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2b now=0x10022da1c flags=182452227 vendor.xxxyyy.ha-1593 [004] 9436.209888: timer_cancel timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216390: timer_init timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216392: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2c now=0x10022da1d flags=186646532 vendor.xxxyyy.ha-1593 [005] 9436.220992: timer_cancel timer=0xffffff80444f0428 xxxyyyTraceManag-7795 [004] 9436.261641: timer_cancel timer=0xffffff80444f0428
[2]
9436.261653][ C4] Unable to handle kernel paging request at virtual address dead00000000012a [ 9436.261664][ C4] Mem abort info: [ 9436.261666][ C4] ESR = 0x96000044 [ 9436.261669][ C4] EC = 0x25: DABT (current EL), IL = 32 bits [ 9436.261671][ C4] SET = 0, FnV = 0 [ 9436.261673][ C4] EA = 0, S1PTW = 0 [ 9436.261675][ C4] Data abort info: [ 9436.261677][ C4] ISV = 0, ISS = 0x00000044 [ 9436.261680][ C4] CM = 0, WnR = 1 [ 9436.261682][ C4] [dead00000000012a] address between user and kernel address ranges [ 9436.261685][ C4] Internal error: Oops: 96000044 [#1] PREEMPT SMP [ 9436.261701][ C4] Skip md ftrace buffer dump for: 0x3a982d0 ...
[ 9436.262138][ C4] CPU: 4 PID: 7795 Comm: TraceManag Tainted: G S W O 5.10.149-android12-9-o-g17f915d29d0c #1 [ 9436.262141][ C4] Hardware name: Qualcomm Technologies, Inc. (DT) [ 9436.262144][ C4] pstate: 22400085 (nzCv daIf +PAN -UAO +TCO BTYPE=--) [ 9436.262161][ C4] pc : expire_timers+0x9c/0x438 [ 9436.262164][ C4] lr : expire_timers+0x2a4/0x438 [ 9436.262168][ C4] sp : ffffffc010023dd0 [ 9436.262171][ C4] x29: ffffffc010023df0 x28: ffffffd0636fdc18 [ 9436.262178][ C4] x27: ffffffd063569dd0 x26: ffffffd063536008 [ 9436.262182][ C4] x25: 0000000000000001 x24: ffffff88f7c69280 [ 9436.262185][ C4] x23: 00000000000000e0 x22: dead000000000122 [ 9436.262188][ C4] x21: 000000010022da29 x20: ffffff8af72b4e80 [ 9436.262191][ C4] x19: ffffffc010023e50 x18: ffffffc010025038 [ 9436.262195][ C4] x17: 0000000000000240 x16: 0000000000000201 [ 9436.262199][ C4] x15: ffffffffffffffff x14: ffffff889f3c3100 [ 9436.262203][ C4] x13: ffffff889f3c3100 x12: 00000000049f56b8 [ 9436.262207][ C4] x11: 00000000049f56b8 x10: 00000000ffffffff [ 9436.262212][ C4] x9 : ffffffc010023e50 x8 : dead000000000122 [ 9436.262216][ C4] x7 : ffffffffffffffff x6 : ffffffc0100239d8 [ 9436.262220][ C4] x5 : 0000000000000000 x4 : 0000000000000101 [ 9436.262223][ C4] x3 : 0000000000000080 x2 : ffffff889edc155c [ 9436.262227][ C4] x1 : ffffff8001005200 x0 : ffffff80444f0428 [ 9436.262232][ C4] Call trace: [ 9436.262236][ C4] expire_timers+0x9c/0x438 [ 9436.262240][ C4] __run_timers+0x1f0/0x330 [ 9436.262245][ C4] run_timer_softirq+0x28/0x58 [ 9436.262255][ C4] efi_header_end+0x168/0x5ec [ 9436.262265][ C4] __irq_exit_rcu+0x108/0x124 [ 9436.262274][ C4] __handle_domain_irq+0x118/0x1e4 [ 9436.262282][ C4] gic_handle_irq.30369+0x6c/0x2bc [ 9436.262286][ C4] el0_irq_naked+0x60/0x6c
Link: https://lore.kernel.org/all/1700860318-4025-1-git-send-email-quic_mojha@quic... Reported-by: Joyyoung Huang huangzaiyang@oppo.com Acked-by: MyungJoo Ham myungjoo.ham@samsung.com Signed-off-by: Mukesh Ojha quic_mojha@quicinc.com Signed-off-by: Chanwoo Choi cw00.choi@samsung.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/devfreq/devfreq.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index b3a68d5833bd..cb1c24721a37 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err);
+ if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock);
+out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); }
@@ -483,6 +487,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return;
+ mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -491,12 +499,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; }
if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start);
@@ -513,6 +525,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN)) return;
+ mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop);
From: Armin Wolf W_Armin@gmx.de
[ Upstream commit 22574e17626391ad969af9a13aaa58a1b37ad384 ]
When removing the ACPI notify/address space handlers, the WMI devices are still active and might still depend on ACPI EC access or WMI events. Fix this by removing the ACPI handlers after all WMI devices associated with an ACPI device have been removed.
Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Armin Wolf W_Armin@gmx.de Link: https://lore.kernel.org/r/20231218192420.305411-3-W_Armin@gmx.de Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/platform/x86/wmi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 5dd22258cb3b..d400e61d6801 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1494,13 +1494,12 @@ static void acpi_wmi_remove(struct platform_device *device) struct acpi_device *acpi_device = ACPI_COMPANION(&device->dev); struct device *wmi_bus_device = dev_get_drvdata(&device->dev);
- acpi_remove_notify_handler(acpi_device->handle, ACPI_ALL_NOTIFY, - acpi_wmi_notify_handler); - acpi_remove_address_space_handler(acpi_device->handle, - ACPI_ADR_SPACE_EC, &acpi_wmi_ec_space_handler); - device_for_each_child_reverse(wmi_bus_device, NULL, wmi_remove_device); device_unregister(wmi_bus_device); + + acpi_remove_notify_handler(acpi_device->handle, ACPI_ALL_NOTIFY, acpi_wmi_notify_handler); + acpi_remove_address_space_handler(acpi_device->handle, ACPI_ADR_SPACE_EC, + &acpi_wmi_ec_space_handler); }
static int acpi_wmi_probe(struct platform_device *device)
Am 16.01.24 um 01:12 schrieb Sasha Levin:
From: Armin Wolf W_Armin@gmx.de
[ Upstream commit 22574e17626391ad969af9a13aaa58a1b37ad384 ]
When removing the ACPI notify/address space handlers, the WMI devices are still active and might still depend on ACPI EC access or WMI events. Fix this by removing the ACPI handlers after all WMI devices associated with an ACPI device have been removed.
Hello,
i would advise against including this patch in the next stable kernels.
The WMI ACPI notify handler is still using list_for_each_entry() which is not safe when the WMI devices are removed before the ACPI notify handler.
This issue existed inside the WMI code for a long time, but this patch might trigger it. Since no users reported any issues regarding ACPI errors during WMI removal, i would drop this patch.
Thanks, Armin Wolf
Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Armin Wolf W_Armin@gmx.de Link: https://lore.kernel.org/r/20231218192420.305411-3-W_Armin@gmx.de Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
drivers/platform/x86/wmi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 5dd22258cb3b..d400e61d6801 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1494,13 +1494,12 @@ static void acpi_wmi_remove(struct platform_device *device) struct acpi_device *acpi_device = ACPI_COMPANION(&device->dev); struct device *wmi_bus_device = dev_get_drvdata(&device->dev);
- acpi_remove_notify_handler(acpi_device->handle, ACPI_ALL_NOTIFY,
acpi_wmi_notify_handler);
- acpi_remove_address_space_handler(acpi_device->handle,
ACPI_ADR_SPACE_EC, &acpi_wmi_ec_space_handler);
- device_for_each_child_reverse(wmi_bus_device, NULL, wmi_remove_device); device_unregister(wmi_bus_device);
acpi_remove_notify_handler(acpi_device->handle, ACPI_ALL_NOTIFY, acpi_wmi_notify_handler);
acpi_remove_address_space_handler(acpi_device->handle, ACPI_ADR_SPACE_EC,
&acpi_wmi_ec_space_handler);
}
static int acpi_wmi_probe(struct platform_device *device)
On Tue, Jan 16, 2024 at 12:53:46PM +0100, Armin Wolf wrote:
Am 16.01.24 um 01:12 schrieb Sasha Levin:
From: Armin Wolf W_Armin@gmx.de
[ Upstream commit 22574e17626391ad969af9a13aaa58a1b37ad384 ]
When removing the ACPI notify/address space handlers, the WMI devices are still active and might still depend on ACPI EC access or WMI events. Fix this by removing the ACPI handlers after all WMI devices associated with an ACPI device have been removed.
Hello,
i would advise against including this patch in the next stable kernels.
The WMI ACPI notify handler is still using list_for_each_entry() which is not safe when the WMI devices are removed before the ACPI notify handler.
This issue existed inside the WMI code for a long time, but this patch might trigger it. Since no users reported any issues regarding ACPI errors during WMI removal, i would drop this patch.
Ack, I'll drop it.
From: Shuai Xue xueshuai@linux.alibaba.com
[ Upstream commit a70297d2213253853e95f5b49651f924990c6d3b ]
There are two major types of uncorrected recoverable (UCR) errors :
- Synchronous error: The error is detected and raised at the point of the consumption in the execution flow, e.g. when a CPU tries to access a poisoned cache line. The CPU will take a synchronous error exception such as Synchronous External Abort (SEA) on Arm64 and Machine Check Exception (MCE) on X86. OS requires to take action (for example, offline failure page/kill failure thread) to recover this uncorrectable error.
- Asynchronous error: The error is detected out of processor execution context, e.g. when an error is detected by a background scrubber. Some data in the memory are corrupted. But the data have not been consumed. OS is optional to take action to recover this uncorrectable error.
When APEI firmware first is enabled, a platform may describe one error source for the handling of synchronous errors (e.g. MCE or SEA notification ), or for handling asynchronous errors (e.g. SCI or External Interrupt notification). In other words, we can distinguish synchronous errors by APEI notification. For synchronous errors, kernel will kill the current process which accessing the poisoned page by sending SIGBUS with BUS_MCEERR_AR. In addition, for asynchronous errors, kernel will notify the process who owns the poisoned page by sending SIGBUS with BUS_MCEERR_AO in early kill mode. However, the GHES driver always sets mf_flags to 0 so that all synchronous errors are handled as asynchronous errors in memory failure.
To this end, set memory failure flags as MF_ACTION_REQUIRED on synchronous events.
Signed-off-by: Shuai Xue xueshuai@linux.alibaba.com Tested-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: Xiaofei Tan tanxiaofei@huawei.com Reviewed-by: Baolin Wang baolin.wang@linux.alibaba.com Reviewed-by: James Morse james.morse@arm.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/acpi/apei/ghes.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63ad0541db38..ab2a82cb1b0b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -101,6 +101,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; }
+/* + * A platform may describe one error source for the handling of synchronous + * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI + * or External Interrupt). On x86, the HEST notifications are always + * asynchronous, so only SEA on ARM is delivered as a synchronous + * notification. + */ +static inline bool is_hest_sync_notify(struct ghes *ghes) +{ + u8 notify_type = ghes->generic->notify.type; + + return notify_type == ACPI_HEST_NOTIFY_SEA; +} + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -489,7 +503,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags) }
static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, - int sev) + int sev, bool sync) { int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); @@ -503,7 +517,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED)) flags = MF_SOFT_OFFLINE; if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE) - flags = 0; + flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1) return ghes_do_memory_failure(mem_err->physical_addr, flags); @@ -511,9 +525,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, return false; }
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev) +static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, + int sev, bool sync) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + int flags = sync ? MF_ACTION_REQUIRED : 0; bool queued = false; int sec_sev, i; char *p; @@ -538,7 +554,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s * and don't filter out 'corrected' error here. */ if (is_cache && has_pa) { - queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0); + queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags); p += err_info->length; continue; } @@ -666,6 +682,7 @@ static bool ghes_do_proc(struct ghes *ghes, const guid_t *fru_id = &guid_null; char *fru_text = ""; bool queued = false; + bool sync = is_hest_sync_notify(ghes);
sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { @@ -683,13 +700,13 @@ static bool ghes_do_proc(struct ghes *ghes, atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err); - queued = ghes_handle_memory_failure(gdata, sev); + queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { - queued = ghes_handle_arm_hw_error(gdata, sev); + queued = ghes_handle_arm_hw_error(gdata, sev, sync); } else { void *err = acpi_hest_get_payload(gdata);
From: Viresh Kumar viresh.kumar@linaro.org
[ Upstream commit ba367479c7ad0b870461024cd5ae7a1ea6e1e3db ]
By mistake, dev_pm_opp_find_level_floor() used the level parameter as unsigned long instead of unsigned int. Fix it.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/opp/core.c | 9 +++++++-- include/linux/pm_opp.h | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84f345c69ea5..630639432eea 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -832,9 +832,14 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); * use. */ struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level) + unsigned int *level) { - return _find_key_floor(dev, level, 0, true, _read_level, NULL); + unsigned long temp = *level; + struct dev_pm_opp *opp; + + opp = _find_key_floor(dev, &temp, 0, true, _read_level, NULL); + *level = temp; + return opp; } EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_floor);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index ccd97bcef269..18a102174c4f 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -157,7 +157,7 @@ struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, unsigned int *level);
struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level); + unsigned int *level);
struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, unsigned int *bw, int index); @@ -324,7 +324,7 @@ static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, }
static inline struct dev_pm_opp *dev_pm_opp_find_level_floor(struct device *dev, - unsigned long *level) + unsigned int *level) { return ERR_PTR(-EOPNOTSUPP); }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
[ Upstream commit 4e814173a8c4f432fd068b1c796f0416328c9d99 ]
There are 3 synchronization issues with thermal zone suspend-resume during system-wide transitions:
1. The resume code runs in a PM notifier which is invoked after user space has been thawed, so it can run concurrently with user space which can trigger a thermal zone device removal. If that happens, the thermal zone resume code may use a stale pointer to the next list element and crash, because it does not hold thermal_list_lock while walking thermal_tz_list.
2. The thermal zone resume code calls thermal_zone_device_init() outside the zone lock, so user space or an update triggered by the platform firmware may see an inconsistent state of a thermal zone leading to unexpected behavior.
3. Clearing the in_suspend global variable in thermal_pm_notify() allows __thermal_zone_device_update() to continue for all thermal zones and it may as well run before the thermal_tz_list walk (or at any point during the list walk for that matter) and attempt to operate on a thermal zone that has not been resumed yet. It may also race destructively with thermal_zone_device_init().
To address these issues, add thermal_list_lock locking to thermal_pm_notify(), especially arount the thermal_tz_list, make it call thermal_zone_device_init() back-to-back with __thermal_zone_device_update() under the zone lock and replace in_suspend with per-zone bool "suspend" indicators set and unset under the given zone's lock.
Link: https://lore.kernel.org/linux-pm/20231218162348.69101-1-bo.ye@mediatek.com/ Reported-by: Bo Ye bo.ye@mediatek.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/thermal/thermal_core.c | 30 +++++++++++++++++++++++------- include/linux/thermal.h | 2 ++ 2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 9c17d35ccbbd..fe3bcfba4d67 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -37,8 +37,6 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock);
-static atomic_t in_suspend; - static struct thermal_governor *def_governor;
/* @@ -405,7 +403,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, { const struct thermal_trip *trip;
- if (atomic_read(&in_suspend)) + if (tz->suspended) return;
if (WARN_ONCE(!tz->ops->get_temp, @@ -1515,17 +1513,35 @@ static int thermal_pm_notify(struct notifier_block *nb, case PM_HIBERNATION_PREPARE: case PM_RESTORE_PREPARE: case PM_SUSPEND_PREPARE: - atomic_set(&in_suspend, 1); + mutex_lock(&thermal_list_lock); + + list_for_each_entry(tz, &thermal_tz_list, node) { + mutex_lock(&tz->lock); + + tz->suspended = true; + + mutex_unlock(&tz->lock); + } + + mutex_unlock(&thermal_list_lock); break; case PM_POST_HIBERNATION: case PM_POST_RESTORE: case PM_POST_SUSPEND: - atomic_set(&in_suspend, 0); + mutex_lock(&thermal_list_lock); + list_for_each_entry(tz, &thermal_tz_list, node) { + mutex_lock(&tz->lock); + + tz->suspended = false; + thermal_zone_device_init(tz); - thermal_zone_device_update(tz, - THERMAL_EVENT_UNSPECIFIED); + __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + + mutex_unlock(&tz->lock); } + + mutex_unlock(&thermal_list_lock); break; default: break; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index cee814d5d1ac..1da1739d75d9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -149,6 +149,7 @@ struct thermal_cooling_device { * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event + * @suspended: thermal zone suspend indicator */ struct thermal_zone_device { int id; @@ -181,6 +182,7 @@ struct thermal_zone_device { struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; + bool suspended; };
/**
linux-stable-mirror@lists.linaro.org