The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x d0b8fec8ae50525b57139393d0bb1f446e82ff7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025120214-musky-conjure-b8b9@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d0b8fec8ae50525b57139393d0bb1f446e82ff7e Mon Sep 17 00:00:00 2001
From: "Bastien Curutchet (Schneider Electric)" <bastien.curutchet(a)bootlin.com>
Date: Thu, 20 Nov 2025 10:12:04 +0100
Subject: [PATCH] net: dsa: microchip: Fix symetry in
ksz_ptp_msg_irq_{setup/free}()
The IRQ numbers created through irq_create_mapping() are only assigned
to ptpmsg_irq[n].num at the end of the IRQ setup. So if an error occurs
between their creation and their assignment (for instance during the
request_threaded_irq() step), we enter the error path and fail to
release the newly created virtual IRQs because they aren't yet assigned
to ptpmsg_irq[n].num.
Move the mapping creation to ksz_ptp_msg_irq_setup() to ensure symetry
with what's released by ksz_ptp_msg_irq_free().
In the error path, move the irq_dispose_mapping to the out_ptp_msg label
so it will be called only on created IRQs.
Cc: stable(a)vger.kernel.org
Fixes: cc13ab18b201 ("net: dsa: microchip: ptp: enable interrupt for timestamping")
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: Bastien Curutchet (Schneider Electric) <bastien.curutchet(a)bootlin.com>
Link: https://patch.msgid.link/20251120-ksz-fix-v6-5-891f80ae7f8f@bootlin.com
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c
index c8bfbe5e2157..997e4a76d0a6 100644
--- a/drivers/net/dsa/microchip/ksz_ptp.c
+++ b/drivers/net/dsa/microchip/ksz_ptp.c
@@ -1093,19 +1093,19 @@ static int ksz_ptp_msg_irq_setup(struct ksz_port *port, u8 n)
static const char * const name[] = {"pdresp-msg", "xdreq-msg",
"sync-msg"};
const struct ksz_dev_ops *ops = port->ksz_dev->dev_ops;
+ struct ksz_irq *ptpirq = &port->ptpirq;
struct ksz_ptp_irq *ptpmsg_irq;
ptpmsg_irq = &port->ptpmsg_irq[n];
+ ptpmsg_irq->num = irq_create_mapping(ptpirq->domain, n);
+ if (!ptpmsg_irq->num)
+ return -EINVAL;
ptpmsg_irq->port = port;
ptpmsg_irq->ts_reg = ops->get_port_addr(port->num, ts_reg[n]);
strscpy(ptpmsg_irq->name, name[n]);
- ptpmsg_irq->num = irq_find_mapping(port->ptpirq.domain, n);
- if (ptpmsg_irq->num < 0)
- return ptpmsg_irq->num;
-
return request_threaded_irq(ptpmsg_irq->num, NULL,
ksz_ptp_msg_thread_fn, IRQF_ONESHOT,
ptpmsg_irq->name, ptpmsg_irq);
@@ -1135,9 +1135,6 @@ int ksz_ptp_irq_setup(struct dsa_switch *ds, u8 p)
if (!ptpirq->domain)
return -ENOMEM;
- for (irq = 0; irq < ptpirq->nirqs; irq++)
- irq_create_mapping(ptpirq->domain, irq);
-
ptpirq->irq_num = irq_find_mapping(port->pirq.domain, PORT_SRC_PTP_INT);
if (!ptpirq->irq_num) {
ret = -EINVAL;
@@ -1159,12 +1156,11 @@ int ksz_ptp_irq_setup(struct dsa_switch *ds, u8 p)
out_ptp_msg:
free_irq(ptpirq->irq_num, ptpirq);
- while (irq--)
+ while (irq--) {
free_irq(port->ptpmsg_irq[irq].num, &port->ptpmsg_irq[irq]);
-out:
- for (irq = 0; irq < ptpirq->nirqs; irq++)
irq_dispose_mapping(port->ptpmsg_irq[irq].num);
-
+ }
+out:
irq_domain_remove(ptpirq->domain);
return ret;
When the second-stage kernel is booted via kexec with a limiting command
line such as "mem=<size>", the physical range that contains the carried
over IMA measurement list may fall outside the truncated RAM leading to
a kernel panic.
BUG: unable to handle page fault for address: ffff97793ff47000
RIP: ima_restore_measurement_list+0xdc/0x45a
#PF: error_code(0x0000) – not-present page
Other architectures already validate the range with page_is_ram(), as
done in commit: cbf9c4b9617b ("of: check previous kernel's
ima-kexec-buffer against memory bounds") do a similar check on x86.
Cc: stable(a)vger.kernel.org
Fixes: b69a2afd5afc ("x86/kexec: Carry forward IMA measurement log on kexec")
Reported-by: Paul Webb <paul.x.webb(a)oracle.com>
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli(a)oracle.com>
---
Have tested the kexec for x86 kernel with IMA_KEXEC enabled and the
above patch works good. Paul initially reported this on 6.12 kernel but
I was able to reproduce this on 6.18, so I tried replicating how this
was fixed in drivers/of/kexec.c
---
arch/x86/kernel/setup.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1b2edd07a3e1..fcef197d180e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -439,9 +439,23 @@ int __init ima_free_kexec_buffer(void)
int __init ima_get_kexec_buffer(void **addr, size_t *size)
{
+ unsigned long start_pfn, end_pfn;
+
if (!ima_kexec_buffer_size)
return -ENOENT;
+ /*
+ * Calculate the PFNs for the buffer and ensure
+ * they are with in addressable memory.
+ */
+ start_pfn = PFN_DOWN(ima_kexec_buffer_phys);
+ end_pfn = PFN_DOWN(ima_kexec_buffer_phys + ima_kexec_buffer_size - 1);
+ if (!pfn_range_is_mapped(start_pfn, end_pfn)) {
+ pr_warn("IMA buffer at 0x%llx, size = 0x%zx beyond memory\n",
+ ima_kexec_buffer_phys, ima_kexec_buffer_size);
+ return -EINVAL;
+ }
+
*addr = __va(ima_kexec_buffer_phys);
*size = ima_kexec_buffer_size;
--
2.50.1
The local variable 'sensitivity' was never clamped to 0 or
POWERSAVE_BIAS_MAX because the return value of clamp() was not used. Fix
this by assigning the clamped value back to 'sensitivity'.
Cc: stable(a)vger.kernel.org
Fixes: 9c5320c8ea8b ("cpufreq: AMD "frequency sensitivity feedback" powersave bias for ondemand governor")
Signed-off-by: Thorsten Blum <thorsten.blum(a)linux.dev>
---
drivers/cpufreq/amd_freq_sensitivity.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 13fed4b9e02b..713ccf24c97d 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -76,7 +76,7 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
sensitivity = POWERSAVE_BIAS_MAX -
(POWERSAVE_BIAS_MAX * (d_reference - d_actual) / d_reference);
- clamp(sensitivity, 0, POWERSAVE_BIAS_MAX);
+ sensitivity = clamp(sensitivity, 0, POWERSAVE_BIAS_MAX);
/* this workload is not CPU bound, so choose a lower freq */
if (sensitivity < od_tuners->powersave_bias) {
--
Thorsten Blum <thorsten.blum(a)linux.dev>
GPG: 1D60 735E 8AEF 3BE4 73B6 9D84 7336 78FD 8DFE EAD4
The local variable 'curr_energy' was never clamped to
PAC_193X_MIN_POWER_ACC or PAC_193X_MAX_POWER_ACC because the return
value of clamp() was not used. Fix this by assigning the clamped value
back to 'curr_energy'.
Cc: stable(a)vger.kernel.org
Fixes: 0fb528c8255b ("iio: adc: adding support for PAC193x")
Signed-off-by: Thorsten Blum <thorsten.blum(a)linux.dev>
---
drivers/iio/adc/pac1934.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/adc/pac1934.c b/drivers/iio/adc/pac1934.c
index 48df16509260..256488d3936b 100644
--- a/drivers/iio/adc/pac1934.c
+++ b/drivers/iio/adc/pac1934.c
@@ -665,7 +665,8 @@ static int pac1934_reg_snapshot(struct pac1934_chip_info *info,
/* add the power_acc field */
curr_energy += inc;
- clamp(curr_energy, PAC_193X_MIN_POWER_ACC, PAC_193X_MAX_POWER_ACC);
+ curr_energy = clamp(curr_energy, PAC_193X_MIN_POWER_ACC,
+ PAC_193X_MAX_POWER_ACC);
reg_data->energy_sec_acc[cnt] = curr_energy;
}
--
Thorsten Blum <thorsten.blum(a)linux.dev>
GPG: 1D60 735E 8AEF 3BE4 73B6 9D84 7336 78FD 8DFE EAD4
The local variable 'val' was never clamped to -75000 or 180000 because
the return value of clamp_val() was not used. Fix this by assigning the
clamped value back to 'val', and use clamp() instead of clamp_val().
Cc: stable(a)vger.kernel.org
Fixes: a557a92e6881 ("net: phy: marvell-88q2xxx: add support for temperature sensor")
Signed-off-by: Thorsten Blum <thorsten.blum(a)linux.dev>
---
drivers/net/phy/marvell-88q2xxx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c
index f3d83b04c953..201dee1a1698 100644
--- a/drivers/net/phy/marvell-88q2xxx.c
+++ b/drivers/net/phy/marvell-88q2xxx.c
@@ -698,7 +698,7 @@ static int mv88q2xxx_hwmon_write(struct device *dev,
switch (attr) {
case hwmon_temp_max:
- clamp_val(val, -75000, 180000);
+ val = clamp(val, -75000, 180000);
val = (val / 1000) + 75;
val = FIELD_PREP(MDIO_MMD_PCS_MV_TEMP_SENSOR3_INT_THRESH_MASK,
val);
--
Thorsten Blum <thorsten.blum(a)linux.dev>
GPG: 1D60 735E 8AEF 3BE4 73B6 9D84 7336 78FD 8DFE EAD4
From: Doug Berger <opendmb(a)gmail.com>
[ Upstream commit 382748c05e58a9f1935f5a653c352422375566ea ]
Commit 16b269436b72 ("sched/deadline: Modify cpudl::free_cpus
to reflect rd->online") introduced the cpudl_set/clear_freecpu
functions to allow the cpu_dl::free_cpus mask to be manipulated
by the deadline scheduler class rq_on/offline callbacks so the
mask would also reflect this state.
Commit 9659e1eeee28 ("sched/deadline: Remove cpu_active_mask
from cpudl_find()") removed the check of the cpu_active_mask to
save some processing on the premise that the cpudl::free_cpus
mask already reflected the runqueue online state.
Unfortunately, there are cases where it is possible for the
cpudl_clear function to set the free_cpus bit for a CPU when the
deadline runqueue is offline. When this occurs while a CPU is
connected to the default root domain the flag may retain the bad
state after the CPU has been unplugged. Later, a different CPU
that is transitioning through the default root domain may push a
deadline task to the powered down CPU when cpudl_find sees its
free_cpus bit is set. If this happens the task will not have the
opportunity to run.
One example is outlined here:
https://lore.kernel.org/lkml/20250110233010.2339521-1-opendmb@gmail.com
Another occurs when the last deadline task is migrated from a
CPU that has an offlined runqueue. The dequeue_task member of
the deadline scheduler class will eventually call cpudl_clear
and set the free_cpus bit for the CPU.
This commit modifies the cpudl_clear function to be aware of the
online state of the deadline runqueue so that the free_cpus mask
can be updated appropriately.
It is no longer necessary to manage the mask outside of the
cpudl_set/clear functions so the cpudl_set/clear_freecpu
functions are removed. In addition, since the free_cpus mask is
now only updated under the cpudl lock the code was changed to
use the non-atomic __cpumask functions.
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
---
kernel/sched/cpudeadline.c | 34 +++++++++-------------------------
kernel/sched/cpudeadline.h | 4 +---
kernel/sched/deadline.c | 8 ++++----
3 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 8cb06c8c7eb1..fd28ba4e1a28 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -166,12 +166,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
* cpudl_clear - remove a CPU from the cpudl max-heap
* @cp: the cpudl max-heap context
* @cpu: the target CPU
+ * @online: the online state of the deadline runqueue
*
* Notes: assumes cpu_rq(cpu)->lock is locked
*
* Returns: (void)
*/
-void cpudl_clear(struct cpudl *cp, int cpu)
+void cpudl_clear(struct cpudl *cp, int cpu, bool online)
{
int old_idx, new_cpu;
unsigned long flags;
@@ -184,7 +185,7 @@ void cpudl_clear(struct cpudl *cp, int cpu)
if (old_idx == IDX_INVALID) {
/*
* Nothing to remove if old_idx was invalid.
- * This could happen if a rq_offline_dl is
+ * This could happen if rq_online_dl or rq_offline_dl is
* called for a CPU without -dl tasks running.
*/
} else {
@@ -195,9 +196,12 @@ void cpudl_clear(struct cpudl *cp, int cpu)
cp->elements[new_cpu].idx = old_idx;
cp->elements[cpu].idx = IDX_INVALID;
cpudl_heapify(cp, old_idx);
-
- cpumask_set_cpu(cpu, cp->free_cpus);
}
+ if (likely(online))
+ __cpumask_set_cpu(cpu, cp->free_cpus);
+ else
+ __cpumask_clear_cpu(cpu, cp->free_cpus);
+
raw_spin_unlock_irqrestore(&cp->lock, flags);
}
@@ -228,7 +232,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
cp->elements[new_idx].cpu = cpu;
cp->elements[cpu].idx = new_idx;
cpudl_heapify_up(cp, new_idx);
- cpumask_clear_cpu(cpu, cp->free_cpus);
+ __cpumask_clear_cpu(cpu, cp->free_cpus);
} else {
cp->elements[old_idx].dl = dl;
cpudl_heapify(cp, old_idx);
@@ -237,26 +241,6 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
raw_spin_unlock_irqrestore(&cp->lock, flags);
}
-/*
- * cpudl_set_freecpu - Set the cpudl.free_cpus
- * @cp: the cpudl max-heap context
- * @cpu: rd attached CPU
- */
-void cpudl_set_freecpu(struct cpudl *cp, int cpu)
-{
- cpumask_set_cpu(cpu, cp->free_cpus);
-}
-
-/*
- * cpudl_clear_freecpu - Clear the cpudl.free_cpus
- * @cp: the cpudl max-heap context
- * @cpu: rd attached CPU
- */
-void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
-{
- cpumask_clear_cpu(cpu, cp->free_cpus);
-}
-
/*
* cpudl_init - initialize the cpudl structure
* @cp: the cpudl max-heap context
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index 0adeda93b5fb..ecff718d94ae 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -18,9 +18,7 @@ struct cpudl {
#ifdef CONFIG_SMP
int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
-void cpudl_clear(struct cpudl *cp, int cpu);
+void cpudl_clear(struct cpudl *cp, int cpu, bool online);
int cpudl_init(struct cpudl *cp);
-void cpudl_set_freecpu(struct cpudl *cp, int cpu);
-void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
void cpudl_cleanup(struct cpudl *cp);
#endif /* CONFIG_SMP */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6548bd90c5c3..85e4ef476686 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1414,7 +1414,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
if (!dl_rq->dl_nr_running) {
dl_rq->earliest_dl.curr = 0;
dl_rq->earliest_dl.next = 0;
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, rq->online);
} else {
struct rb_node *leftmost = dl_rq->root.rb_leftmost;
struct sched_dl_entity *entry;
@@ -2349,9 +2349,10 @@ static void rq_online_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_set_overload(rq);
- cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
if (rq->dl.dl_nr_running > 0)
cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
+ else
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, true);
}
/* Assumes rq->lock is held */
@@ -2360,8 +2361,7 @@ static void rq_offline_dl(struct rq *rq)
if (rq->dl.overloaded)
dl_clear_overload(rq);
- cpudl_clear(&rq->rd->cpudl, rq->cpu);
- cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
+ cpudl_clear(&rq->rd->cpudl, rq->cpu, false);
}
void __init init_sched_dl_class(void)
--
2.34.1
Hi,
This Linux kernel patch series introduces support for error recovery for
passthrough PCI devices on System Z (s390x).
Background
----------
For PCI devices on s390x an operating system receives platform specific
error events from firmware rather than through AER.Today for
passthrough/userspace devices, we don't attempt any error recovery and
ignore any error events for the devices. The passthrough/userspace devices
are managed by the vfio-pci driver. The driver does register error handling
callbacks (error_detected), and on an error trigger an eventfd to
userspace. But we need a mechanism to notify userspace
(QEMU/guest/userspace drivers) about the error event.
Proposal
--------
We can expose this error information (currently only the PCI Error Code)
via a device feature. Userspace can then obtain the error information
via VFIO_DEVICE_FEATURE ioctl and take appropriate actions such as driving
a device reset.
This is how a typical flow for passthrough devices to a VM would work:
For passthrough devices to a VM, the driver bound to the device on the host
is vfio-pci. vfio-pci driver does support the error_detected() callback
(vfio_pci_core_aer_err_detected()), and on an PCI error s390x recovery
code on the host will call the vfio-pci error_detected() callback. The
vfio-pci error_detected() callback will notify userspace/QEMU via an
eventfd, and return PCI_ERS_RESULT_CAN_RECOVER. At this point the s390x
error recovery on the host will skip any further action(see patch 6) and
let userspace drive the error recovery.
Once userspace/QEMU is notified, it then injects this error into the VM
so device drivers in the VM can take recovery actions. For example for a
passthrough NVMe device, the VM's OS NVMe driver will access the device.
At this point the VM's NVMe driver's error_detected() will drive the
recovery by returning PCI_ERS_RESULT_NEED_RESET, and the s390x error
recovery in the VM's OS will try to do a reset. Resets are privileged
operations and so the VM will need intervention from QEMU to perform the
reset. QEMU will invoke the VFIO_DEVICE_RESET ioctl to now notify the
host that the VM is requesting a reset of the device. The vfio-pci driver
on the host will then perform the reset on the device to recover it.
Thanks
Farhan
ChangeLog
---------
v5 series https://lore.kernel.org/all/20251113183502.2388-1-alifm@linux.ibm.com/
v5 -> v6
- Rebase on 6.18 + Lukas's PCI: Universal error recoverability of
devices series (https://lore.kernel.org/all/cover.1763483367.git.lukas@wunner.de/)
- Re-work config space accessibility check to pci_dev_save_and_disable() (patch 3).
This avoids saving the config space, in the reset path, if the device's config space is
corrupted or inaccessible.
v4 series https://lore.kernel.org/all/20250924171628.826-1-alifm@linux.ibm.com/
v4 -> v5
- Rebase on 6.18-rc5
- Move bug fixes to the beginning of the series (patch 1 and 2). These patches
were posted as a separate fixes series
https://lore.kernel.org/all/a14936ac-47d6-461b-816f-0fd66f869b0f@linux.ibm.…
- Add matching pci_put_dev() for pci_get_slot() (patch 6).
v3 series https://lore.kernel.org/all/20250911183307.1910-1-alifm@linux.ibm.com/
v3 -> v4
- Remove warn messages for each PCI capability not restored (patch 1)
- Check PCI_COMMAND and PCI_STATUS register for error value instead of device id
(patch 1)
- Fix kernel crash in patch 3
- Added reviewed by tags
- Address comments from Niklas's (patches 4, 5, 7)
- Fix compilation error non s390x system (patch 8)
- Explicitly align struct vfio_device_feature_zpci_err (patch 8)
v2 series https://lore.kernel.org/all/20250825171226.1602-1-alifm@linux.ibm.com/
v2 -> v3
- Patch 1 avoids saving any config space state if the device is in error
(suggested by Alex)
- Patch 2 adds additional check only for FLR reset to try other function
reset method (suggested by Alex).
- Patch 3 fixes a bug in s390 for resetting PCI devices with multiple
functions. Creates a new flag pci_slot to allow per function slot.
- Patch 4 fixes a bug in s390 for resource to bus address translation.
- Rebase on 6.17-rc5
v1 series https://lore.kernel.org/all/20250813170821.1115-1-alifm@linux.ibm.com/
v1 - > v2
- Patches 1 and 2 adds some additional checks for FLR/PM reset to
try other function reset method (suggested by Alex).
- Patch 3 fixes a bug in s390 for resetting PCI devices with multiple
functions.
- Patch 7 adds a new device feature for zPCI devices for the VFIO_DEVICE_FEATURE
ioctl. The ioctl is used by userspace to retriece any PCI error
information for the device (suggested by Alex).
- Patch 8 adds a reset_done() callback for the vfio-pci driver, to
restore the state of the device after a reset.
- Patch 9 removes the pcie check for triggering VFIO_PCI_ERR_IRQ_INDEX.
Farhan Ali (9):
PCI: Allow per function PCI slots
s390/pci: Add architecture specific resource/bus address translation
PCI: Avoid saving config space state if inaccessible
PCI: Add additional checks for flr reset
s390/pci: Update the logic for detecting passthrough device
s390/pci: Store PCI error information for passthrough devices
vfio-pci/zdev: Add a device feature for error information
vfio: Add a reset_done callback for vfio-pci driver
vfio: Remove the pcie check for VFIO_PCI_ERR_IRQ_INDEX
arch/s390/include/asm/pci.h | 29 ++++++++
arch/s390/pci/pci.c | 75 +++++++++++++++++++++
arch/s390/pci/pci_event.c | 107 +++++++++++++++++-------------
drivers/pci/host-bridge.c | 4 +-
drivers/pci/pci.c | 19 +++++-
drivers/pci/slot.c | 25 ++++++-
drivers/vfio/pci/vfio_pci_core.c | 20 ++++--
drivers/vfio/pci/vfio_pci_intrs.c | 3 +-
drivers/vfio/pci/vfio_pci_priv.h | 9 +++
drivers/vfio/pci/vfio_pci_zdev.c | 45 ++++++++++++-
include/linux/pci.h | 1 +
include/uapi/linux/vfio.h | 15 +++++
12 files changed, 291 insertions(+), 61 deletions(-)
--
2.43.0
The macro FAN_FROM_REG evaluates its arguments multiple times. When used
in lockless contexts involving shared driver data, this leads to
Time-of-Check to Time-of-Use (TOCTOU) race conditions, potentially
causing divide-by-zero errors.
Convert the macro to a static function. This guarantees that arguments
are evaluated only once (pass-by-value), preventing the race
conditions.
Additionally, in store_fan_div, move the calculation of the minimum
limit inside the update lock. This ensures that the read-modify-write
sequence operates on consistent data.
Adhere to the principle of minimal changes by only converting macros
that evaluate arguments multiple times and are used in lockless
contexts.
Link: https://lore.kernel.org/all/CALbr=LYJ_ehtp53HXEVkSpYoub+XYSTU8Rg=o1xxMJ8=5z…
Fixes: 9873964d6eb2 ("[PATCH] HWMON: w83791d: New hardware monitoring driver for the Winbond W83791D")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
Based on the discussion in the link, I will submit a series of patches to
address TOCTOU issues in the hwmon subsystem by converting macros to
functions or adjusting locking where appropriate.
---
drivers/hwmon/w83791d.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index ace854b370a0..996e36951f9d 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -218,9 +218,14 @@ static u8 fan_to_reg(long rpm, int div)
return clamp_val((1350000 + rpm * div / 2) / (rpm * div), 1, 254);
}
-#define FAN_FROM_REG(val, div) ((val) == 0 ? -1 : \
- ((val) == 255 ? 0 : \
- 1350000 / ((val) * (div))))
+static int fan_from_reg(int val, int div)
+{
+ if (val == 0)
+ return -1;
+ if (val == 255)
+ return 0;
+ return 1350000 / (val * div);
+}
/* for temp1 which is 8-bit resolution, LSB = 1 degree Celsius */
#define TEMP1_FROM_REG(val) ((val) * 1000)
@@ -521,7 +526,7 @@ static ssize_t show_##reg(struct device *dev, struct device_attribute *attr, \
struct w83791d_data *data = w83791d_update_device(dev); \
int nr = sensor_attr->index; \
return sprintf(buf, "%d\n", \
- FAN_FROM_REG(data->reg[nr], DIV_FROM_REG(data->fan_div[nr]))); \
+ fan_from_reg(data->reg[nr], DIV_FROM_REG(data->fan_div[nr]))); \
}
show_fan_reg(fan);
@@ -585,10 +590,10 @@ static ssize_t store_fan_div(struct device *dev, struct device_attribute *attr,
if (err)
return err;
+ mutex_lock(&data->update_lock);
/* Save fan_min */
- min = FAN_FROM_REG(data->fan_min[nr], DIV_FROM_REG(data->fan_div[nr]));
+ min = fan_from_reg(data->fan_min[nr], DIV_FROM_REG(data->fan_div[nr]));
- mutex_lock(&data->update_lock);
data->fan_div[nr] = div_to_reg(nr, val);
switch (nr) {
--
2.43.0