[ Upstream commit ed93c6f68a3be06e4e0c331c6e751f462dee3932 ]
When checking for a supported IRQ number, the following test is used:
/* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */
if ((1 << it->options[1]) & 0xdcfc) {
However, `it->options[i]` is an unchecked `int` value from userspace, so
the shift amount could be negative or out of bounds. Fix the test by
requiring `it->options[1]` to be within bounds before proceeding with
the original test.
Reported-by: syzbot+c52293513298e0fd9a94(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c52293513298e0fd9a94
Fixes: 729988507680 ("staging: comedi: das16m1: tidy up the irq support in das16m1_attach()")
Tested-by: syzbot+c52293513298e0fd9a94(a)syzkaller.appspotmail.com
Suggested-by: "Enju, Kohei" <enjuk(a)amazon.co.jp>
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250707130908.70758-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/comedi/drivers/das16m1.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/das16m1.c b/drivers/staging/comedi/drivers/das16m1.c
index 75f3dbbe97ac..0d54387a1c26 100644
--- a/drivers/staging/comedi/drivers/das16m1.c
+++ b/drivers/staging/comedi/drivers/das16m1.c
@@ -523,7 +523,8 @@ static int das16m1_attach(struct comedi_device *dev,
devpriv->extra_iobase = dev->iobase + DAS16M1_8255_IOBASE;
/* only irqs 2, 3, 4, 5, 6, 7, 10, 11, 12, 14, and 15 are valid */
- if ((1 << it->options[1]) & 0xdcfc) {
+ if (it->options[1] >= 2 && it->options[1] <= 15 &&
+ (1 << it->options[1]) & 0xdcfc) {
ret = request_irq(it->options[1], das16m1_interrupt, 0,
dev->board_name, dev);
if (ret == 0)
--
2.47.2
[ Upstream commit 70f2b28b5243df557f51c054c20058ae207baaac ]
When checking for a supported IRQ number, the following test is used:
/* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */
if ((1 << it->options[1]) & 0x8cec) {
However, `it->options[i]` is an unchecked `int` value from userspace, so
the shift amount could be negative or out of bounds. Fix the test by
requiring `it->options[1]` to be within bounds before proceeding with
the original test. Valid `it->options[1]` values that select the IRQ
will be in the range [1,15]. The value 0 explicitly disables the use of
interrupts.
Fixes: 79e5e6addbb1 ("staging: comedi: das6402: rewrite broken driver")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250707135737.77448-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/comedi/drivers/das6402.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c
index 96f4107b8054..927d4b832ecc 100644
--- a/drivers/staging/comedi/drivers/das6402.c
+++ b/drivers/staging/comedi/drivers/das6402.c
@@ -569,7 +569,8 @@ static int das6402_attach(struct comedi_device *dev,
das6402_reset(dev);
/* IRQs 2,3,5,6,7, 10,11,15 are valid for "enhanced" mode */
- if ((1 << it->options[1]) & 0x8cec) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (1 << it->options[1]) & 0x8cec) {
ret = request_irq(it->options[1], das6402_interrupt, 0,
dev->board_name, dev);
if (ret == 0) {
--
2.47.2
[ Upstream commit 46d8c744136ce2454aa4c35c138cc06817f92b8e ]
Some Comedi subdevice instruction handlers are known to access
instruction data elements beyond the first `insn->n` elements in some
cases. The `do_insn_ioctl()` and `do_insnlist_ioctl()` functions
allocate at least `MIN_SAMPLES` (16) data elements to deal with this,
but they do not initialize all of that. For Comedi instruction codes
that write to the subdevice, the first `insn->n` data elements are
copied from user-space, but the remaining elements are left
uninitialized. That could be a problem if the subdevice instruction
handler reads the uninitialized data. Ensure that the first
`MIN_SAMPLES` elements are initialized before calling these instruction
handlers, filling the uncopied elements with 0. For
`do_insnlist_ioctl()`, the same data buffer elements are used for
handling a list of instructions, so ensure the first `MIN_SAMPLES`
elements are initialized for each instruction that writes to the
subdevice.
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250707161439.88385-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/comedi/comedi_fops.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 5aa6a84d1fa6..96d68cc8f449 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1551,21 +1551,27 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
}
for (i = 0; i < n_insns; ++i) {
+ unsigned int n = insns[i].n;
+
if (insns[i].insn & INSN_MASK_WRITE) {
if (copy_from_user(data, insns[i].data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_from_user failed\n");
ret = -EFAULT;
goto error;
}
+ if (n < MIN_SAMPLES) {
+ memset(&data[n], 0, (MIN_SAMPLES - n) *
+ sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, insns + i, data, file);
if (ret < 0)
goto error;
if (insns[i].insn & INSN_MASK_READ) {
if (copy_to_user(insns[i].data, data,
- insns[i].n * sizeof(unsigned int))) {
+ n * sizeof(unsigned int))) {
dev_dbg(dev->class_dev,
"copy_to_user failed\n");
ret = -EFAULT;
@@ -1638,6 +1644,10 @@ static int do_insn_ioctl(struct comedi_device *dev,
ret = -EFAULT;
goto error;
}
+ if (insn->n < MIN_SAMPLES) {
+ memset(&data[insn->n], 0,
+ (MIN_SAMPLES - insn->n) * sizeof(unsigned int));
+ }
}
ret = parse_insn(dev, insn, data, file);
if (ret < 0)
--
2.47.2
[ Upstream commit 08ae4b20f5e82101d77326ecab9089e110f224cc ]
The handling of the `COMEDI_INSNLIST` ioctl allocates a kernel buffer to
hold the array of `struct comedi_insn`, getting the length from the
`n_insns` member of the `struct comedi_insnlist` supplied by the user.
The allocation will fail with a WARNING and a stack dump if it is too
large.
Avoid that by failing with an `-EINVAL` error if the supplied `n_insns`
value is unreasonable.
Define the limit on the `n_insns` value in the `MAX_INSNS` macro. Set
this to the same value as `MAX_SAMPLES` (65536), which is the maximum
allowed sum of the values of the member `n` in the array of `struct
comedi_insn`, and sensible comedi instructions will have an `n` of at
least 1.
Reported-by: syzbot+d6995b62e5ac7d79557a(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=d6995b62e5ac7d79557a
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Tested-by: Ian Abbott <abbotti(a)mev.co.uk>
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250704120405.83028-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/comedi/comedi_fops.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 8f896e6208a8..5aa6a84d1fa6 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1584,6 +1584,16 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
return i;
}
+#define MAX_INSNS MAX_SAMPLES
+static int check_insnlist_len(struct comedi_device *dev, unsigned int n_insns)
+{
+ if (n_insns > MAX_INSNS) {
+ dev_dbg(dev->class_dev, "insnlist length too large\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/*
* COMEDI_INSN ioctl
* synchronous instruction
@@ -2234,6 +2244,9 @@ static long comedi_unlocked_ioctl(struct file *file, unsigned int cmd,
rc = -EFAULT;
break;
}
+ rc = check_insnlist_len(dev, insnlist.n_insns);
+ if (rc)
+ break;
insns = kcalloc(insnlist.n_insns, sizeof(*insns), GFP_KERNEL);
if (!insns) {
rc = -ENOMEM;
@@ -3085,6 +3098,9 @@ static int compat_insnlist(struct file *file, unsigned long arg)
if (copy_from_user(&insnlist32, compat_ptr(arg), sizeof(insnlist32)))
return -EFAULT;
+ rc = check_insnlist_len(dev, insnlist32.n_insns);
+ if (rc)
+ return rc;
insns = kcalloc(insnlist32.n_insns, sizeof(*insns), GFP_KERNEL);
if (!insns)
return -ENOMEM;
--
2.47.2
[ Upstream commit 1b98304c09a0192598d0767f1eb8c83d7e793091 ]
In `waveform_common_attach()`, the two timers `&devpriv->ai_timer` and
`&devpriv->ao_timer` are initialized after the allocation of the device
private data by `comedi_alloc_devpriv()` and the subdevices by
`comedi_alloc_subdevices()`. The function may return with an error
between those function calls. In that case, `waveform_detach()` will be
called by the Comedi core to clean up. The check that
`waveform_detach()` uses to decide whether to delete the timers is
incorrect. It only checks that the device private data was allocated,
but that does not guarantee that the timers were initialized. It also
needs to check that the subdevices were allocated. Fix it.
Fixes: 73e0e4dfed4c ("staging: comedi: comedi_test: fix timer lock-up")
Cc: stable(a)vger.kernel.org # 6.15+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250708130627.21743-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[ changed timer_delete_sync() to del_timer_sync() ]
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
drivers/staging/comedi/drivers/comedi_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c
index bea9a3adf08c..f5199474c0e9 100644
--- a/drivers/staging/comedi/drivers/comedi_test.c
+++ b/drivers/staging/comedi/drivers/comedi_test.c
@@ -790,7 +790,7 @@ static void waveform_detach(struct comedi_device *dev)
{
struct waveform_private *devpriv = dev->private;
- if (devpriv) {
+ if (devpriv && dev->n_subdevices) {
del_timer_sync(&devpriv->ai_timer);
del_timer_sync(&devpriv->ao_timer);
}
--
2.47.2
[ Upstream commit b14b076ce593f72585412fc7fd3747e03a5e3632 ]
When checking for a supported IRQ number, the following test is used:
if ((1 << it->options[1]) & board->irq_bits) {
However, `it->options[i]` is an unchecked `int` value from userspace, so
the shift amount could be negative or out of bounds. Fix the test by
requiring `it->options[1]` to be within bounds before proceeding with
the original test. Valid `it->options[1]` values that select the IRQ
will be in the range [1,15]. The value 0 explicitly disables the use of
interrupts.
Reported-by: syzbot+32de323b0addb9e114ff(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=32de323b0addb9e114ff
Fixes: fcdb427bc7cf ("Staging: comedi: add pcl821 driver")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
Link: https://lore.kernel.org/r/20250707133429.73202-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/comedi/drivers/pcl812.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/comedi/drivers/pcl812.c b/drivers/staging/comedi/drivers/pcl812.c
index aefc1b849cf7..98112c79e2d7 100644
--- a/drivers/staging/comedi/drivers/pcl812.c
+++ b/drivers/staging/comedi/drivers/pcl812.c
@@ -1151,7 +1151,8 @@ static int pcl812_attach(struct comedi_device *dev, struct comedi_devconfig *it)
if (!dev->pacer)
return -ENOMEM;
- if ((1 << it->options[1]) & board->irq_bits) {
+ if (it->options[1] > 0 && it->options[1] < 16 &&
+ (1 << it->options[1]) & board->irq_bits) {
ret = request_irq(it->options[1], pcl812_interrupt, 0,
dev->board_name, dev);
if (ret == 0)
--
2.47.2
Commit 21b688dabecb ("net: phy: micrel: Cable Diag feature for lan8814
phy") introduced cable_test support for the LAN8814 that reuses parts of
the KSZ886x logic and introduced the cable_diag_reg and pair_mask
parameters to account for differences between those chips.
However, it did not update the ksz8081_type struct, so those members are
now 0, causing no pairs to be tested in ksz886x_cable_test_get_status
and ksz886x_cable_test_wait_for_completion to poll the wrong register
for the affected PHYs (Basic Control/Reset, which is 0 in normal
operation) and exit immediately.
Fix this by setting both struct members accordingly.
Fixes: 21b688dabecb ("net: phy: micrel: Cable Diag feature for lan8814 phy")
Cc: stable(a)vger.kernel.org
Signed-off-by: Florian Larysch <fl(a)n621.de>
---
drivers/net/phy/micrel.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 64aa03aed770..50c6a4e8cfa1 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -472,6 +472,8 @@ static const struct kszphy_type ksz8051_type = {
static const struct kszphy_type ksz8081_type = {
.led_mode_reg = MII_KSZPHY_CTRL_2,
+ .cable_diag_reg = KSZ8081_LMD,
+ .pair_mask = KSZPHY_WIRE_PAIR_MASK,
.has_broadcast_disable = true,
.has_nand_tree_disable = true,
.has_rmii_ref_clk_sel = true,
--
2.50.1
From: Edip Hazuri <edip(a)medip.dev>
The mute led on this laptop is using ALC245 but requires a quirk to work
This patch enables the existing quirk for the device.
Tested on Victus 16-r1xxx Laptop. The LED behaviour works
as intended.
v2:
- adapt the HD-audio code changes and rebase on for-next branch of tiwai/sound.git
- link to v1: https://lore.kernel.org/linux-sound/20250724210756.61453-2-edip@medip.dev/
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Edip Hazuri <edip(a)medip.dev>
---
sound/hda/codecs/realtek/alc269.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c
index 05019fa73..33ef08d25 100644
--- a/sound/hda/codecs/realtek/alc269.c
+++ b/sound/hda/codecs/realtek/alc269.c
@@ -6580,6 +6580,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
--
2.50.1
From: Edip Hazuri <edip(a)medip.dev>
The mute led on this laptop is using ALC245 but requires a quirk to work
This patch enables the existing quirk for the device.
Tested on Victus 16-r1xxx Laptop. The LED behaviour works
as intended.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Edip Hazuri <edip(a)medip.dev>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2627e2f49..9656e6ebb 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10874,6 +10874,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8c91, "HP EliteBook 660", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8c96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8c97, "HP ZBook", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8c99, "HP Victus 16-r1xxx (MB 8C99)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8c9c, "HP Victus 16-s1xxx (MB 8C9C)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT),
SND_PCI_QUIRK(0x103c, 0x8ca1, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8ca2, "HP ZBook Power", ALC236_FIXUP_HP_GPIO_LED),
--
2.50.1
The `insn_rw_emulate_bits()` function is used as a default handler for
`INSN_READ` instructions for subdevices that have a handler for
`INSN_BITS` but not for `INSN_READ`. Similarly, it is used as a default
handler for `INSN_WRITE` instructions for subdevices that have a handler
for `INSN_BITS` but not for `INSN_WRITE`. It works by emulating the
`INSN_READ` or `INSN_WRITE` instruction handling with a constructed
`INSN_BITS` instruction. However, `INSN_READ` and `INSN_WRITE`
instructions are supposed to be able read or write multiple samples,
indicated by the `insn->n` value, but `insn_rw_emulate_bits()` currently
only handles a single sample. For `INSN_READ`, the comedi core will
copy `insn->n` samples back to user-space. (That triggered KASAN
kernel-infoleak errors when `insn->n` was greater than 1, but that is
being fixed more generally elsewhere in the comedi core.)
Make `insn_rw_emulate_bits()` either handle `insn->n` samples, or return
an error, to conform to the general expectation for `INSN_READ` and
`INSN_WRITE` handlers.
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Cc: <stable(a)vger.kernel.org> # 5.13+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
For 5.4.y and 5.10.y, this patch conflicts with submitted patches for
upstream commit e9cb26291d00 ("comedi: Fix use of uninitialized data in
insn_rw_emulate_bits()").
---
drivers/comedi/drivers.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/drivers/comedi/drivers.c b/drivers/comedi/drivers.c
index f1dc854928c1..c9ebaadc5e82 100644
--- a/drivers/comedi/drivers.c
+++ b/drivers/comedi/drivers.c
@@ -620,11 +620,9 @@ static int insn_rw_emulate_bits(struct comedi_device *dev,
unsigned int chan = CR_CHAN(insn->chanspec);
unsigned int base_chan = (chan < 32) ? 0 : chan;
unsigned int _data[2];
+ unsigned int i;
int ret;
- if (insn->n == 0)
- return 0;
-
memset(_data, 0, sizeof(_data));
memset(&_insn, 0, sizeof(_insn));
_insn.insn = INSN_BITS;
@@ -635,18 +633,21 @@ static int insn_rw_emulate_bits(struct comedi_device *dev,
if (insn->insn == INSN_WRITE) {
if (!(s->subdev_flags & SDF_WRITABLE))
return -EINVAL;
- _data[0] = 1U << (chan - base_chan); /* mask */
- _data[1] = data[0] ? (1U << (chan - base_chan)) : 0; /* bits */
+ _data[0] = 1U << (chan - base_chan); /* mask */
}
+ for (i = 0; i < insn->n; i++) {
+ if (insn->insn == INSN_WRITE)
+ _data[1] = data[i] ? _data[0] : 0; /* bits */
- ret = s->insn_bits(dev, s, &_insn, _data);
- if (ret < 0)
- return ret;
+ ret = s->insn_bits(dev, s, &_insn, _data);
+ if (ret < 0)
+ return ret;
- if (insn->insn == INSN_READ)
- data[0] = (_data[1] >> (chan - base_chan)) & 1;
+ if (insn->insn == INSN_READ)
+ data[i] = (_data[1] >> (chan - base_chan)) & 1;
+ }
- return 1;
+ return insn->n;
}
static int __comedi_device_postconfig_async(struct comedi_device *dev,
--
2.47.2
During fuzz testing, the following issue was discovered:
BUG: KMSAN: uninit-value in __dma_map_sg_attrs+0x217/0x310
__dma_map_sg_attrs+0x217/0x310
dma_map_sg_attrs+0x4a/0x70
ata_qc_issue+0x9f8/0x1420
__ata_scsi_queuecmd+0x1657/0x1740
ata_scsi_queuecmd+0x79a/0x920
scsi_queue_rq+0x4472/0x4f40
blk_mq_dispatch_rq_list+0x1cca/0x3ee0
__blk_mq_sched_dispatch_requests+0x458/0x630
blk_mq_sched_dispatch_requests+0x15b/0x340
__blk_mq_run_hw_queue+0xe5/0x250
__blk_mq_delay_run_hw_queue+0x138/0x780
blk_mq_run_hw_queue+0x4bb/0x7e0
blk_mq_sched_insert_request+0x2a7/0x4c0
blk_execute_rq+0x497/0x8a0
sg_io+0xbe0/0xe20
scsi_ioctl+0x2b36/0x3c60
sr_block_ioctl+0x319/0x440
blkdev_ioctl+0x80f/0xd70
__se_sys_ioctl+0x219/0x420
__x64_sys_ioctl+0x93/0xe0
x64_sys_call+0x1d6c/0x3ad0
do_syscall_64+0x4c/0xa0
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
Uninit was created at:
__alloc_pages+0x5c0/0xc80
alloc_pages+0xe0e/0x1050
blk_rq_map_user_iov+0x2b77/0x6100
blk_rq_map_user_io+0x2fa/0x4d0
sg_io+0xad6/0xe20
scsi_ioctl+0x2b36/0x3c60
sr_block_ioctl+0x319/0x440
blkdev_ioctl+0x80f/0xd70
__se_sys_ioctl+0x219/0x420
__x64_sys_ioctl+0x93/0xe0
x64_sys_call+0x1d6c/0x3ad0
do_syscall_64+0x4c/0xa0
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
Bytes 14-15 of 16 are uninitialized
Memory access of size 16 starts at ffff88800cbdb000
When processing the last unaligned element of the scatterlist,
it is supplemented with missing bytes in the amount of pad_len.
These bytes remain uninitialized, which leads to a problem.
Add zeroing pad_len bytes of padding by pad_offset offset before
increasing its length. This ensures that the DMA does not receive
uninitialized data and eliminates the KMSAN warning.
In this case, the pages are not located in highmem, but in the
general case they might be, so kmap_local_page() is used for mapping.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: 40b01b9bbdf5 ("block: update bio according to DMA alignment padding")
Co-developed-by: Boris Tonofa <b.tonofa(a)ideco.ru>
Signed-off-by: Boris Tonofa <b.tonofa(a)ideco.ru>
Signed-off-by: Petr Vaganov <p.vaganov(a)ideco.ru>
---
drivers/scsi/scsi_lib.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 144c72f0737a..d287e24b6013 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1153,6 +1153,11 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) {
unsigned int pad_len =
(rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+ unsigned int pad_offset = last_sg->offset + last_sg->length;
+ void *vaddr = kmap_local_page(sg_page(last_sg));
+
+ memset(vaddr + pad_offset, 0, pad_len);
+ kunmap_local(vaddr);
last_sg->length += pad_len;
cmd->extra_len += pad_len;
--
2.50.1
The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF,
is determined by an asynchronous kthread. This can create a race
condition where the kthread finishes after the vDSO data has
already been populated, causing userspace to read stale values.
To fix this, a completion-based framework is introduced to robustly
synchronize the async probes with the vDSO data population. The
waiting function, init_hwprobe_vdso_data(), now blocks on
wait_for_completion() until all probes signal they are done.
Furthermore, to prevent this potential blocking from impacting boot
performance, the initialization is deferred to late_initcall. This
is safe as the data is only required by userspace (which starts
much later) and moves the synchronization delay off the critical
boot path.
Reported-by: Tsukasa OI <research_trasio(a)irq.a4lg.com>
Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@ir…
Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe")
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Alexandre Ghiti <alexghiti(a)rivosinc.com>
Cc: Olof Johansson <olof(a)lixom.net>
Cc: stable(a)vger.kernel.org
Signed-off-by: Jingwei Wang <wangjingwei(a)iscas.ac.cn>
---
Changes in v5:
- Reworked the synchronization logic to a robust "sentinel-count"
pattern based on feedback from Alexandre.
- Fixed a "multiple definition" linker error for nommu builds by changing
the header-file stub functions to `static inline`, as pointed out by Olof.
- Updated the commit message to better explain the rationale for moving
the vDSO initialization to `late_initcall`.
Changes in v4:
- Reworked the synchronization mechanism based on feedback from Palmer
and Alexandre.
- Instead of a post-hoc refresh, this version introduces a robust
completion-based framework using an atomic counter to ensure async
probes are finished before populating the vDSO.
- Moved the vdso data initialization to a late_initcall to avoid
impacting boot time.
Changes in v3:
- Retained existing blank line.
Changes in v2:
- Addressed feedback from Yixun's regarding #ifdef CONFIG_MMU usage.
- Updated commit message to provide a high-level summary.
- Added Fixes tag for commit e7c9d66e313b.
v1: https://lore.kernel.org/linux-riscv/20250521052754.185231-1-wangjingwei@isc…
arch/riscv/include/asm/hwprobe.h | 8 +++++++-
arch/riscv/kernel/sys_hwprobe.c | 20 +++++++++++++++++++-
arch/riscv/kernel/unaligned_access_speed.c | 9 +++++++--
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 7fe0a379474ae2c6..3b2888126e659ea1 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -40,5 +40,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair,
return pair->value == other_pair->value;
}
-
+#ifdef CONFIG_MMU
+void riscv_hwprobe_register_async_probe(void);
+void riscv_hwprobe_complete_async_probe(void);
+#else
+static inline void riscv_hwprobe_register_async_probe(void) {}
+static inline void riscv_hwprobe_complete_async_probe(void) {}
+#endif
#endif
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index 0b170e18a2beba57..ee02aeb03e7bd3d8 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -5,6 +5,8 @@
* more details.
*/
#include <linux/syscalls.h>
+#include <linux/completion.h>
+#include <linux/atomic.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
@@ -467,6 +469,20 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
#ifdef CONFIG_MMU
+static DECLARE_COMPLETION(boot_probes_done);
+static atomic_t pending_boot_probes = ATOMIC_INIT(1);
+
+void riscv_hwprobe_register_async_probe(void)
+{
+ atomic_inc(&pending_boot_probes);
+}
+
+void riscv_hwprobe_complete_async_probe(void)
+{
+ if (atomic_dec_and_test(&pending_boot_probes))
+ complete(&boot_probes_done);
+}
+
static int __init init_hwprobe_vdso_data(void)
{
struct vdso_arch_data *avd = vdso_k_arch_data;
@@ -474,6 +490,8 @@ static int __init init_hwprobe_vdso_data(void)
struct riscv_hwprobe pair;
int key;
+ if (unlikely(!atomic_dec_and_test(&pending_boot_probes)))
+ wait_for_completion(&boot_probes_done);
/*
* Initialize vDSO data with the answers for the "all CPUs" case, to
* save a syscall in the common case.
@@ -504,7 +522,7 @@ static int __init init_hwprobe_vdso_data(void)
return 0;
}
-arch_initcall_sync(init_hwprobe_vdso_data);
+late_initcall(init_hwprobe_vdso_data);
#endif /* CONFIG_MMU */
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index ae2068425fbcd207..4b8ad2673b0f7470 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus
static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
schedule_on_each_cpu(check_vector_unaligned_access);
+ riscv_hwprobe_complete_async_probe();
return 0;
}
@@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
} else if (!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
- kthread_run(vec_check_unaligned_access_speed_all_cpus,
- NULL, "vec_check_unaligned_access_speed_all_cpus");
+ riscv_hwprobe_register_async_probe();
+ if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
+ NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
+ pr_warn("Failed to create vec_unalign_check kthread\n");
+ riscv_hwprobe_complete_async_probe();
+ }
}
/*
--
2.50.0
syzbot reports a KMSAN kernel-infoleak in `do_insn_ioctl()`. A kernel
buffer is allocated to hold `insn->n` samples (each of which is an
`unsigned int`). For some instruction types, `insn->n` samples are
copied back to user-space, unless an error code is being returned. The
problem is that not all the instruction handlers that need to return
data to userspace fill in the whole `insn->n` samples, so that there is
an information leak. There is a similar syzbot report for
`do_insnlist_ioctl()`, although it does not have a reproducer for it at
the time of writing.
One culprit is `insn_rw_emulate_bits()` which is used as the handler for
`INSN_READ` or `INSN_WRITE` instructions for subdevices that do not have
a specific handler for that instruction, but do have an `INSN_BITS`
handler. For `INSN_READ` it only fills in at most 1 sample, so if
`insn->n` is greater than 1, the remaining `insn->n - 1` samples copied
to userspace will be uninitialized kernel data.
Another culprit is `vm80xx_ai_insn_read()` in the "vm80xx" driver. It
never returns an error, even if it fails to fill the buffer.
Fix it in `do_insn_ioctl()` and `do_insnlist_ioctl()` by making sure
that uninitialized parts of the allocated buffer are zeroed before
handling each instruction.
Thanks to Arnaud Lecomte for their fix to `do_insn_ioctl()`. That fix
replaced the call to `kmalloc_array()` with `kcalloc()`, but it is not
always necessary to clear the whole buffer.
Fixes: ed9eccbe8970 ("Staging: add comedi core")
Reported-by: syzbot+a5e45f768aab5892da5d(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=a5e45f768aab5892da5d
Reported-by: syzbot+fb4362a104d45ab09cf9(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=fb4362a104d45ab09cf9
Cc: <stable(a)vger.kernel.org> #5.13+
Cc: Arnaud Lecomte <contact(a)arnaud-lcm.com>
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
Cherry picks to fix this for 5.4.y and 5.10.y not yet available.
---
drivers/comedi/comedi_fops.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/comedi/comedi_fops.c b/drivers/comedi/comedi_fops.c
index 23b7178522ae..7e2f2b1a1c36 100644
--- a/drivers/comedi/comedi_fops.c
+++ b/drivers/comedi/comedi_fops.c
@@ -1587,6 +1587,9 @@ static int do_insnlist_ioctl(struct comedi_device *dev,
memset(&data[n], 0, (MIN_SAMPLES - n) *
sizeof(unsigned int));
}
+ } else {
+ memset(data, 0, max_t(unsigned int, n, MIN_SAMPLES) *
+ sizeof(unsigned int));
}
ret = parse_insn(dev, insns + i, data, file);
if (ret < 0)
@@ -1670,6 +1673,8 @@ static int do_insn_ioctl(struct comedi_device *dev,
memset(&data[insn->n], 0,
(MIN_SAMPLES - insn->n) * sizeof(unsigned int));
}
+ } else {
+ memset(data, 0, n_data * sizeof(unsigned int));
}
ret = parse_insn(dev, insn, data, file);
if (ret < 0)
--
2.47.2
Dear Stable Kernel Team and Maintainers,
I am writing to request a backport of the following commit from the
mainline kernel to the 5.15.y stable branch:
Commit: x86/ioremap: Use is_ioremap_addr() in iounmap()
ID: 50c6dbdfd16e312382842198a7919341ad480e05
Author: Max Ramanouski
Merged in: Linux 6.11-rc1 (approximately August 2024)
This commit fixes a bug in the iounmap() function for x86
architectures in kernel versions 5.x. Specifically, the original code
uses a check against high_memory:
if ((void __force *)addr <= high_memory)
return;
This can lead to memory leaks on certain x86 servers where ioremap()
returns addresses that are not guaranteed to be greater than
high_memory, causing the function to return early without properly
unmapping the memory.
The fix replaces this with is_ioremap_addr(), making the check more reliable:
if (WARN_ON_ONCE(!is_ioremap_addr((void __force *)addr)))
return;
I have checked the 5.15.y branch logs and did not find this backport.
This issue affects production environments, particularly on customer
machines where we cannot easily deploy custom kernels. Backporting
this to 5.15.y (and possibly other LTS branches like 5.10.y if
applicable) would help resolve the memory leak without requiring users
to upgrade to 6.x series.
Do you have plans to backport this commit? If not, could you please
consider it for inclusion in the stable releases?
Thank you for your time and efforts in maintaining the stable kernels.
Best regards,
xin.sun
From: Nianyao Tang <tangnianyao(a)huawei.com>
[ upstream commit e8cde32f111f7f5681a7bad3ec747e9e697569a9 ]
Enable ECBHB bits in ID_AA64MMFR1 register as per ARM DDI 0487K.a
specification.
When guest OS read ID_AA64MMFR1_EL1, kvm emulate this reg using
ftr_id_aa64mmfr1 and always return ID_AA64MMFR1_EL1.ECBHB=0 to guest.
It results in guest syscall jump to tramp ventry, which is not needed
in implementation with ID_AA64MMFR1_EL1.ECBHB=1.
Let's make the guest syscall process the same as the host.
This fixes performance regressions introduced by commit 4117975672c4
("arm64: errata: Add newer ARM cores to the spectre_bhb_loop_affected()
lists") for guests running on neoverse v2 hardware, which supports
ECBHB.
Signed-off-by: Nianyao Tang <tangnianyao(a)huawei.com>
Link: https://lore.kernel.org/r/20240611122049.2758600-1-tangnianyao@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Signed-off-by: Patrick Roy <roypat(a)amazon.co.uk>
---
arch/arm64/kernel/cpufeature.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b6d381f743f3..2ce9ef9d924a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -364,6 +364,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0),
--
2.50.1
Buffer bouncing is needed only when memory exists above the lowmem region,
i.e., when max_low_pfn < max_pfn. The previous check (max_low_pfn >=
max_pfn) was inverted and prevented bouncing when it could actually be
required.
Note that bouncing depends on CONFIG_HIGHMEM, which is typically enabled
on 32-bit ARM where not all memory is permanently mapped into the kernel’s
lowmem region.
Fixes: 9bb33f24abbd0 ("block: refactor the bounce buffering code")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hardeep Sharma <quic_hardshar(a)quicinc.com>
---
block/blk.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk.h b/block/blk.h
index 67915b04b3c1..f8a1d64be5a2 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -383,7 +383,7 @@ static inline bool blk_queue_may_bounce(struct request_queue *q)
{
return IS_ENABLED(CONFIG_BOUNCE) &&
q->limits.bounce == BLK_BOUNCE_HIGH &&
- max_low_pfn >= max_pfn;
+ max_low_pfn < max_pfn;
}
static inline struct bio *blk_queue_bounce(struct bio *bio,
--
2.25.1
From: Kairui Song <kasong(a)tencent.com>
The current swap-in code assumes that, when a swap entry in shmem mapping
is order 0, its cached folios (if present) must be order 0 too, which
turns out not always correct.
The problem is shmem_split_large_entry is called before verifying the
folio will eventually be swapped in, one possible race is:
CPU1 CPU2
shmem_swapin_folio
/* swap in of order > 0 swap entry S1 */
folio = swap_cache_get_folio
/* folio = NULL */
order = xa_get_order
/* order > 0 */
folio = shmem_swap_alloc_folio
/* mTHP alloc failure, folio = NULL */
<... Interrupted ...>
shmem_swapin_folio
/* S1 is swapped in */
shmem_writeout
/* S1 is swapped out, folio cached */
shmem_split_large_entry(..., S1)
/* S1 is split, but the folio covering it has order > 0 now */
Now any following swapin of S1 will hang: `xa_get_order` returns 0, and
folio lookup will return a folio with order > 0. The
`xa_get_order(&mapping->i_pages, index) != folio_order(folio)` will always
return false causing swap-in to return -EEXIST.
And this looks fragile. So fix this up by allowing seeing a larger folio
in swap cache, and check the whole shmem mapping range covered by the
swapin have the right swap value upon inserting the folio. And drop the
redundant tree walks before the insertion.
This will actually improve performance, as it avoids two redundant Xarray
tree walks in the hot path, and the only side effect is that in the
failure path, shmem may redundantly reallocate a few folios causing
temporary slight memory pressure.
And worth noting, it may seems the order and value check before inserting
might help reducing the lock contention, which is not true. The swap
cache layer ensures raced swapin will either see a swap cache folio or
failed to do a swapin (we have SWAP_HAS_CACHE bit even if swap cache is
bypassed), so holding the folio lock and checking the folio flag is
already good enough for avoiding the lock contention. The chance that a
folio passes the swap entry value check but the shmem mapping slot has
changed should be very low.
Fixes: 809bc86517cc ("mm: shmem: support large folio swap out")
Signed-off-by: Kairui Song <kasong(a)tencent.com>
Reviewed-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
---
mm/shmem.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 334b7b4a61a0..e3c9a1365ff4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -884,7 +884,9 @@ static int shmem_add_to_page_cache(struct folio *folio,
pgoff_t index, void *expected, gfp_t gfp)
{
XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
- long nr = folio_nr_pages(folio);
+ unsigned long nr = folio_nr_pages(folio);
+ swp_entry_t iter, swap;
+ void *entry;
VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -896,14 +898,24 @@ static int shmem_add_to_page_cache(struct folio *folio,
gfp &= GFP_RECLAIM_MASK;
folio_throttle_swaprate(folio, gfp);
+ swap = iter = radix_to_swp_entry(expected);
do {
xas_lock_irq(&xas);
- if (expected != xas_find_conflict(&xas)) {
- xas_set_err(&xas, -EEXIST);
- goto unlock;
+ xas_for_each_conflict(&xas, entry) {
+ /*
+ * The range must either be empty, or filled with
+ * expected swap entries. Shmem swap entries are never
+ * partially freed without split of both entry and
+ * folio, so there shouldn't be any holes.
+ */
+ if (!expected || entry != swp_to_radix_entry(iter)) {
+ xas_set_err(&xas, -EEXIST);
+ goto unlock;
+ }
+ iter.val += 1 << xas_get_order(&xas);
}
- if (expected && xas_find_conflict(&xas)) {
+ if (expected && iter.val - nr != swap.val) {
xas_set_err(&xas, -EEXIST);
goto unlock;
}
@@ -2323,7 +2335,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
error = -ENOMEM;
goto failed;
}
- } else if (order != folio_order(folio)) {
+ } else if (order > folio_order(folio)) {
/*
* Swap readahead may swap in order 0 folios into swapcache
* asynchronously, while the shmem mapping can still stores
@@ -2348,15 +2360,15 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
swap = swp_entry(swp_type(swap), swp_offset(swap) + offset);
}
+ } else if (order < folio_order(folio)) {
+ swap.val = round_down(swap.val, 1 << folio_order(folio));
}
alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
- folio->swap.val != swap.val ||
- !shmem_confirm_swap(mapping, index, swap) ||
- xa_get_order(&mapping->i_pages, index) != folio_order(folio)) {
+ folio->swap.val != swap.val) {
error = -EEXIST;
goto unlock;
}
--
2.50.0
The quilt patch titled
Subject: mm: swap: fix potential buffer overflow in setup_clusters()
has been removed from the -mm tree. Its filename was
mm-swap-fix-potensial-buffer-overflow-in-setup_clusters.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
Subject: mm: swap: fix potential buffer overflow in setup_clusters()
Date: Thu, 22 May 2025 20:25:53 +0800
In setup_swap_map(), we only ensure badpages are in range (0, last_page].
As maxpages might be < last_page, setup_clusters() will encounter a buffer
overflow when a badpage is >= maxpages.
Only call inc_cluster_info_page() for badpage which is < maxpages to fix
the issue.
Link: https://lkml.kernel.org/r/20250522122554.12209-4-shikemeng@huaweicloud.com
Fixes: b843786b0bd0 ("mm: swapfile: fix SSD detection with swapfile on btrfs")
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baoquan He <bhe(a)redhat.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Kairui Song <kasong(a)tencent.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/swapfile.c~mm-swap-fix-potensial-buffer-overflow-in-setup_clusters
+++ a/mm/swapfile.c
@@ -3208,9 +3208,13 @@ static struct swap_cluster_info *setup_c
* and the EOF part of the last cluster.
*/
inc_cluster_info_page(si, cluster_info, 0);
- for (i = 0; i < swap_header->info.nr_badpages; i++)
- inc_cluster_info_page(si, cluster_info,
- swap_header->info.badpages[i]);
+ for (i = 0; i < swap_header->info.nr_badpages; i++) {
+ unsigned int page_nr = swap_header->info.badpages[i];
+
+ if (page_nr >= maxpages)
+ continue;
+ inc_cluster_info_page(si, cluster_info, page_nr);
+ }
for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
inc_cluster_info_page(si, cluster_info, i);
_
Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are
The quilt patch titled
Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop
has been removed from the -mm tree. Its filename was
mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kemeng Shi <shikemeng(a)huaweicloud.com>
Subject: mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop
Date: Thu, 22 May 2025 20:25:52 +0800
We use maxpages from read_swap_header() to initialize swap_info_struct,
however the maxpages might be reduced in setup_swap_extents() and the
si->max is assigned with the reduced maxpages from the
setup_swap_extents().
Obviously, this could lead to memory waste as we allocated memory based on
larger maxpages, besides, this could lead to a potential deadloop as
following:
1) When calling setup_clusters() with larger maxpages, unavailable
pages within range [si->max, larger maxpages) are not accounted with
inc_cluster_info_page(). As a result, these pages are assumed
available but can not be allocated. The cluster contains these pages
can be moved to frag_clusters list after it's all available pages were
allocated.
2) When the cluster mentioned in 1) is the only cluster in
frag_clusters list, cluster_alloc_swap_entry() assume order 0
allocation will never failed and will enter a deadloop by keep trying
to allocate page from the only cluster in frag_clusters which contains
no actually available page.
Call setup_swap_extents() to get the final maxpages before
swap_info_struct initialization to fix the issue.
After this change, span will include badblocks and will become large
value which I think is correct value:
In summary, there are two kinds of swapfile_activate operations.
1. Filesystem style: Treat all blocks logical continuity and find
usable physical extents in logical range. In this way, si->pages will
be actual usable physical blocks and span will be "1 + highest_block -
lowest_block".
2. Block device style: Treat all blocks physically continue and only
one single extent is added. In this way, si->pages will be si->max and
span will be "si->pages - 1". Actually, si->pages and si->max is only
used in block device style and span value is set with si->pages. As a
result, span value in block device style will become a larger value as
you mentioned.
I think larger value is correct based on:
1. Span value in filesystem style is "1 + highest_block -
lowest_block" which is the range cover all possible phisical blocks
including the badblocks.
2. For block device style, si->pages is the actual usable block number
and is already in pr_info. The original span value before this patch
is also refer to usable block number which is redundant in pr_info.
[shikemeng(a)huaweicloud.com: ensure si->pages == si->max - 1 after setup_swap_extents()]
Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20250718065139.61989-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com
Fixes: 661383c6111a ("mm: swap: relaim the cached parts that got scanned")
Signed-off-by: Kemeng Shi <shikemeng(a)huaweicloud.com>
Reviewed-by: Baoquan He <bhe(a)redhat.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Kairui Song <kasong(a)tencent.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 53 +++++++++++++++++++++++-------------------------
1 file changed, 26 insertions(+), 27 deletions(-)
--- a/mm/swapfile.c~mm-swap-correctly-use-maxpages-in-swapon-syscall-to-avoid-potensial-deadloop
+++ a/mm/swapfile.c
@@ -3141,43 +3141,30 @@ static unsigned long read_swap_header(st
return maxpages;
}
-static int setup_swap_map_and_extents(struct swap_info_struct *si,
- union swap_header *swap_header,
- unsigned char *swap_map,
- unsigned long maxpages,
- sector_t *span)
+static int setup_swap_map(struct swap_info_struct *si,
+ union swap_header *swap_header,
+ unsigned char *swap_map,
+ unsigned long maxpages)
{
- unsigned int nr_good_pages;
unsigned long i;
- int nr_extents;
-
- nr_good_pages = maxpages - 1; /* omit header page */
+ swap_map[0] = SWAP_MAP_BAD; /* omit header page */
for (i = 0; i < swap_header->info.nr_badpages; i++) {
unsigned int page_nr = swap_header->info.badpages[i];
if (page_nr == 0 || page_nr > swap_header->info.last_page)
return -EINVAL;
if (page_nr < maxpages) {
swap_map[page_nr] = SWAP_MAP_BAD;
- nr_good_pages--;
+ si->pages--;
}
}
- if (nr_good_pages) {
- swap_map[0] = SWAP_MAP_BAD;
- si->max = maxpages;
- si->pages = nr_good_pages;
- nr_extents = setup_swap_extents(si, span);
- if (nr_extents < 0)
- return nr_extents;
- nr_good_pages = si->pages;
- }
- if (!nr_good_pages) {
+ if (!si->pages) {
pr_warn("Empty swap-file\n");
return -EINVAL;
}
- return nr_extents;
+ return 0;
}
#define SWAP_CLUSTER_INFO_COLS \
@@ -3217,7 +3204,7 @@ static struct swap_cluster_info *setup_c
* Mark unusable pages as unavailable. The clusters aren't
* marked free yet, so no list operations are involved yet.
*
- * See setup_swap_map_and_extents(): header page, bad pages,
+ * See setup_swap_map(): header page, bad pages,
* and the EOF part of the last cluster.
*/
inc_cluster_info_page(si, cluster_info, 0);
@@ -3363,6 +3350,21 @@ SYSCALL_DEFINE2(swapon, const char __use
goto bad_swap_unlock_inode;
}
+ si->max = maxpages;
+ si->pages = maxpages - 1;
+ nr_extents = setup_swap_extents(si, &span);
+ if (nr_extents < 0) {
+ error = nr_extents;
+ goto bad_swap_unlock_inode;
+ }
+ if (si->pages != si->max - 1) {
+ pr_err("swap:%u != (max:%u - 1)\n", si->pages, si->max);
+ error = -EINVAL;
+ goto bad_swap_unlock_inode;
+ }
+
+ maxpages = si->max;
+
/* OK, set up the swap map and apply the bad block list */
swap_map = vzalloc(maxpages);
if (!swap_map) {
@@ -3374,12 +3376,9 @@ SYSCALL_DEFINE2(swapon, const char __use
if (error)
goto bad_swap_unlock_inode;
- nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map,
- maxpages, &span);
- if (unlikely(nr_extents < 0)) {
- error = nr_extents;
+ error = setup_swap_map(si, swap_header, swap_map, maxpages);
+ if (error)
goto bad_swap_unlock_inode;
- }
/*
* Use kvmalloc_array instead of bitmap_zalloc as the allocation order might
_
Patches currently in -mm which might be from shikemeng(a)huaweicloud.com are