May 2025 - Linux-stable-mirror

[PATCH v3] iio: common: st_sensors: Fix use of uninitialize device structs

by Maud Spierings via B4 Relay

From: Maud Spierings <maudspierings(a)gocontroll.com> Throughout the various probe functions &indio_dev->dev is used before it is initialized. This caused a kernel panic in st_sensors_power_enable() when the call to devm_regulator_bulk_get_enable() fails and then calls dev_err_probe() with the uninitialized device. This seems to only cause a panic with dev_err_probe(), dev_err(), dev_warn() and dev_info() don't seem to cause a panic, but are fixed as well. The issue is reported and traced here: [1] [1]: https://lore.kernel.org/all/AM7P189MB100986A83D2F28AF3FFAF976E39EA@AM7P189M… Cc: stable(a)vger.kernel.org Signed-off-by: Maud Spierings <maudspierings(a)gocontroll.com> --- When I search for general &indio_dev->dev usage, I see quite a lot more hits, but I am not sure if there are issues with those too. This issue has existed for a long time it seems and therefore it is nearly impossible to find a proper fixes tag. I would love to see it at least backported to 6.12 as that is where I encountered it, and I believe the patch should apply without conflicts. --- Changes in v3: - Added the stable cc to the commit message - Move the link to the original issue to the commit message - Fix function notation in the commit message - Move some more of the dev_*() calls to one line - Link to v2: https://lore.kernel.org/r/20250522-st_iio_fix-v2-1-07a32655a996@gocontroll.… Changes in v2: - Added SoB in commit message - Link to v1: https://lore.kernel.org/r/20250522-st_iio_fix-v1-1-d689b35f1612@gocontroll.… --- drivers/iio/accel/st_accel_core.c | 10 +++--- drivers/iio/common/st_sensors/st_sensors_core.c | 36 ++++++++++------------ drivers/iio/common/st_sensors/st_sensors_trigger.c | 20 ++++++------ 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 99cb661fabb2d9cc1943fa8d0a6f3becb71126e6..a7961c610ed203d039bbf298c8883031a578fb0b 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -1353,6 +1353,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) union acpi_object *ont; union acpi_object *elements; acpi_status status; + struct device *parent = indio_dev->dev.parent; int ret = -EINVAL; unsigned int val; int i, j; @@ -1371,7 +1372,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) }; - adev = ACPI_COMPANION(indio_dev->dev.parent); + adev = ACPI_COMPANION(parent); if (!adev) return -ENXIO; @@ -1380,8 +1381,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) if (status == AE_NOT_FOUND) { return -ENXIO; } else if (ACPI_FAILURE(status)) { - dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n", - status); + dev_warn(parent, "failed to execute _ONT: %d\n", status); return status; } @@ -1457,12 +1457,12 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev) } ret = 0; - dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n"); + dev_info(parent, "computed mount matrix from ACPI\n"); out: kfree(buffer.pointer); if (ret) - dev_dbg(&indio_dev->dev, + dev_dbg(parent, "failed to apply ACPI orientation data: %d\n", ret); return ret; diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 8ce1dccfea4f5aaff45d3d40f6542323dd1f0b09..dac593be56958fd0be92e13f628350fcfd0f040d 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -154,7 +154,7 @@ static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs) return err; st_accel_set_fullscale_error: - dev_err(&indio_dev->dev, "failed to set new fullscale.\n"); + dev_err(indio_dev->dev.parent, "failed to set new fullscale.\n"); return err; } @@ -231,8 +231,7 @@ int st_sensors_power_enable(struct iio_dev *indio_dev) ARRAY_SIZE(regulator_names), regulator_names); if (err) - return dev_err_probe(&indio_dev->dev, err, - "unable to enable supplies\n"); + return dev_err_probe(parent, err, "unable to enable supplies\n"); return 0; } @@ -241,13 +240,14 @@ EXPORT_SYMBOL_NS(st_sensors_power_enable, "IIO_ST_SENSORS"); static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev, struct st_sensors_platform_data *pdata) { + struct device *parent = indio_dev->dev.parent; struct st_sensor_data *sdata = iio_priv(indio_dev); /* Sensor does not support interrupts */ if (!sdata->sensor_settings->drdy_irq.int1.addr && !sdata->sensor_settings->drdy_irq.int2.addr) { if (pdata->drdy_int_pin) - dev_info(&indio_dev->dev, + dev_info(parent, "DRDY on pin INT%d specified, but sensor does not support interrupts\n", pdata->drdy_int_pin); return 0; @@ -256,29 +256,27 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev, switch (pdata->drdy_int_pin) { case 1: if (!sdata->sensor_settings->drdy_irq.int1.mask) { - dev_err(&indio_dev->dev, - "DRDY on INT1 not available.\n"); + dev_err(parent, "DRDY on INT1 not available.\n"); return -EINVAL; } sdata->drdy_int_pin = 1; break; case 2: if (!sdata->sensor_settings->drdy_irq.int2.mask) { - dev_err(&indio_dev->dev, - "DRDY on INT2 not available.\n"); + dev_err(parent, "DRDY on INT2 not available.\n"); return -EINVAL; } sdata->drdy_int_pin = 2; break; default: - dev_err(&indio_dev->dev, "DRDY on pdata not valid.\n"); + dev_err(parent, "DRDY on pdata not valid.\n"); return -EINVAL; } if (pdata->open_drain) { if (!sdata->sensor_settings->drdy_irq.int1.addr_od && !sdata->sensor_settings->drdy_irq.int2.addr_od) - dev_err(&indio_dev->dev, + dev_err(parent, "open drain requested but unsupported.\n"); else sdata->int_pin_open_drain = true; @@ -336,6 +334,7 @@ EXPORT_SYMBOL_NS(st_sensors_dev_name_probe, "IIO_ST_SENSORS"); int st_sensors_init_sensor(struct iio_dev *indio_dev, struct st_sensors_platform_data *pdata) { + struct device *parent = indio_dev->dev.parent; struct st_sensor_data *sdata = iio_priv(indio_dev); struct st_sensors_platform_data *of_pdata; int err = 0; @@ -343,7 +342,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, mutex_init(&sdata->odr_lock); /* If OF/DT pdata exists, it will take precedence of anything else */ - of_pdata = st_sensors_dev_probe(indio_dev->dev.parent, pdata); + of_pdata = st_sensors_dev_probe(parent, pdata); if (IS_ERR(of_pdata)) return PTR_ERR(of_pdata); if (of_pdata) @@ -370,7 +369,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, if (err < 0) return err; } else - dev_info(&indio_dev->dev, "Full-scale not possible\n"); + dev_info(parent, "Full-scale not possible\n"); err = st_sensors_set_odr(indio_dev, sdata->odr); if (err < 0) @@ -405,7 +404,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev, mask = sdata->sensor_settings->drdy_irq.int2.mask_od; } - dev_info(&indio_dev->dev, + dev_info(parent, "set interrupt line to open drain mode on pin %d\n", sdata->drdy_int_pin); err = st_sensors_write_data_with_mask(indio_dev, addr, @@ -593,21 +592,20 @@ EXPORT_SYMBOL_NS(st_sensors_get_settings_index, "IIO_ST_SENSORS"); int st_sensors_verify_id(struct iio_dev *indio_dev) { struct st_sensor_data *sdata = iio_priv(indio_dev); + struct device *parent = indio_dev->dev.parent; int wai, err; if (sdata->sensor_settings->wai_addr) { err = regmap_read(sdata->regmap, sdata->sensor_settings->wai_addr, &wai); if (err < 0) { - dev_err(&indio_dev->dev, - "failed to read Who-Am-I register.\n"); - return err; + return dev_err_probe(parent, err, + "failed to read Who-Am-I register.\n"); } if (sdata->sensor_settings->wai != wai) { - dev_warn(&indio_dev->dev, - "%s: WhoAmI mismatch (0x%x).\n", - indio_dev->name, wai); + dev_warn(parent, "%s: WhoAmI mismatch (0x%x).\n", + indio_dev->name, wai); } } diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 9d4bf822a15dfcdd6c2835f6b9d7698cd3cb0b08..8a8ab688d7980f6dd43c660f90a0eba32c38388b 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -127,7 +127,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger", indio_dev->name); if (sdata->trig == NULL) { - dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n"); + dev_err(parent, "failed to allocate iio trigger.\n"); return -ENOMEM; } @@ -143,7 +143,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, case IRQF_TRIGGER_FALLING: case IRQF_TRIGGER_LOW: if (!sdata->sensor_settings->drdy_irq.addr_ihl) { - dev_err(&indio_dev->dev, + dev_err(parent, "falling/low specified for IRQ but hardware supports only rising/high: will request rising/high\n"); if (irq_trig == IRQF_TRIGGER_FALLING) irq_trig = IRQF_TRIGGER_RISING; @@ -156,21 +156,19 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->sensor_settings->drdy_irq.mask_ihl, 1); if (err < 0) return err; - dev_info(&indio_dev->dev, + dev_info(parent, "interrupts on the falling edge or active low level\n"); } break; case IRQF_TRIGGER_RISING: - dev_info(&indio_dev->dev, - "interrupts on the rising edge\n"); + dev_info(parent, "interrupts on the rising edge\n"); break; case IRQF_TRIGGER_HIGH: - dev_info(&indio_dev->dev, - "interrupts active high level\n"); + dev_info(parent, "interrupts active high level\n"); break; default: /* This is the most preferred mode, if possible */ - dev_err(&indio_dev->dev, + dev_err(parent, "unsupported IRQ trigger specified (%lx), enforce rising edge\n", irq_trig); irq_trig = IRQF_TRIGGER_RISING; } @@ -179,7 +177,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, if (irq_trig == IRQF_TRIGGER_FALLING || irq_trig == IRQF_TRIGGER_RISING) { if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) { - dev_err(&indio_dev->dev, + dev_err(parent, "edge IRQ not supported w/o stat register.\n"); return -EOPNOTSUPP; } @@ -214,13 +212,13 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->trig->name, sdata->trig); if (err) { - dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n"); + dev_err(parent, "failed to request trigger IRQ.\n"); return err; } err = devm_iio_trigger_register(parent, sdata->trig); if (err < 0) { - dev_err(&indio_dev->dev, "failed to register iio trigger.\n"); + dev_err(parent, "failed to register iio trigger.\n"); return err; } indio_dev->trig = iio_trigger_get(sdata->trig); --- base-commit: 7bac2c97af4078d7a627500c9bcdd5b033f97718 change-id: 20250522-st_iio_fix-1c58fdd4d420 Best regards, -- Maud Spierings <maudspierings(a)gocontroll.com>

1 week, 4 days

2
1
0 0

[PATCH v3] net/mlx5_core: Add error handling inmlx5_query_nic_vport_qkey_viol_cntr()

by Wentao Liang

The function mlx5_query_nic_vport_qkey_viol_cntr() calls the function mlx5_query_nic_vport_context() but does not check its return value. This could lead to undefined behavior if the query fails. A proper implementation can be found in mlx5_nic_vport_query_local_lb(). Add error handling for mlx5_query_nic_vport_context(). If it fails, free the out buffer via kvfree() and return error code. Fixes: 9efa75254593 ("net/mlx5_core: Introduce access functions to query vport RoCE fields") Cc: stable(a)vger.kernel.org # v4.5 Target: net Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn> --- v3: Explicitly mention target branch. Change improper code. v2: Remove redundant reassignment. Fix RCT. drivers/net/ethernet/mellanox/mlx5/core/vport.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 66e44905c1f0..e4b86633d2fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -522,19 +522,22 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, nic_vport_context.qkey_violation_counter); - +out: kvfree(out); - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); -- 2.42.0.windows.2

1 week, 4 days

4
3
0 0

[PATCH] regulator: max14577: Add error check for max14577_read_reg()

by Wentao Liang

The function max14577_reg_get_current_limit() calls the function max14577_read_reg(), but does not check its return value. A proper implementation can be found in max14577_get_online(). Add a error check for the max14577_read_reg() and return error code if the function fails. Fixes: b0902bbeb768 ("regulator: max14577: Add regulator driver for Maxim 14577") Cc: stable(a)vger.kernel.org # v3.14 Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn> --- drivers/regulator/max14577-regulator.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c index 5e7171b9065a..41fd15adfd1f 100644 --- a/drivers/regulator/max14577-regulator.c +++ b/drivers/regulator/max14577-regulator.c @@ -40,11 +40,14 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev) struct max14577 *max14577 = rdev_get_drvdata(rdev); const struct maxim_charger_current *limits = &maxim_charger_currents[max14577->dev_type]; + int ret; if (rdev_get_id(rdev) != MAX14577_CHARGER) return -EINVAL; - max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL4, &reg_data); + ret = max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL4, &reg_data); + if (ret < 0) + return ret; if ((reg_data & CHGCTRL4_MBCICHWRCL_MASK) == 0) return limits->min; -- 2.42.0.windows.2

1 week, 4 days

2
1
0 0

[PATCH] firmware: cs_dsp: Fix OOB memory read access in KUnit test (wmfw info)

by Jaroslav Kysela

KASAN reported out of bounds access - cs_dsp_mock_wmfw_add_info(), because the source string length was rounded up to the allocation size. Cc: Simon Trimmer <simont(a)opensource.cirrus.com> Cc: Charles Keepax <ckeepax(a)opensource.cirrus.com> Cc: Richard Fitzgerald <rf(a)opensource.cirrus.com> Cc: patches(a)opensource.cirrus.com Cc: stable(a)vger.kernel.org Signed-off-by: Jaroslav Kysela <perex(a)perex.cz> --- drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c index 5a3ac03ac37f..4fa74550dafd 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c @@ -133,10 +133,11 @@ void cs_dsp_mock_wmfw_add_info(struct cs_dsp_mock_wmfw_builder *builder, if (info_len % 4) { /* Create a padded string with length a multiple of 4 */ + size_t copy_len = info_len; info_len = round_up(info_len, 4); tmp = kunit_kzalloc(builder->test_priv->test, info_len, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, tmp); - memcpy(tmp, info, info_len); + memcpy(tmp, info, copy_len); info = tmp; } -- 2.49.0

1 week, 4 days

3
2
0 0

[PATCH] firmware: cs_dsp: Fix OOB memory read access in KUnit test (ctl cache)

by Jaroslav Kysela

KASAN reported out of bounds access - cs_dsp_ctl_cache_init_multiple_offsets(). The code uses mock_coeff_template.length_bytes (4 bytes) for register value allocations. But later, this length is set to 8 bytes which causes test code failures. As fix, just remove the lenght override, keeping the original value 4 for all operations. Cc: Simon Trimmer <simont(a)opensource.cirrus.com> Cc: Charles Keepax <ckeepax(a)opensource.cirrus.com> Cc: Richard Fitzgerald <rf(a)opensource.cirrus.com> Cc: patches(a)opensource.cirrus.com Cc: stable(a)vger.kernel.org Signed-off-by: Jaroslav Kysela <perex(a)perex.cz> --- drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c b/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c index 83386cc978e3..ebca3a4ab0f1 100644 --- a/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c +++ b/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c @@ -776,7 +776,6 @@ static void cs_dsp_ctl_cache_init_multiple_offsets(struct kunit *test) "dummyalg", NULL); /* Create controls identical except for offset */ - def.length_bytes = 8; def.offset_dsp_words = 0; def.shortname = "CtlA"; cs_dsp_mock_wmfw_add_coeff_desc(local->wmfw_builder, &def); -- 2.49.0

1 week, 4 days

3
2
0 0

[PATCH v2] ext4: inline: convert when mmap is called, not when page is written

by Thadeu Lima de Souza Cascardo

inline data handling has a race between writing and writing to a memory map. When ext4_page_mkwrite is called, it calls ext4_convert_inline_data, which destroys the inline data, but if block allocation fails, restores the inline data. In that process, we could have: CPU1 CPU2 destroy_inline_data write_begin (does not see inline data) restory_inline_data write_end (sees inline data) This leads to bugs like the one below, as write_begin did not prepare for the case of inline data, which is expected by the write_end side of it. ------------[ cut here ]------------ kernel BUG at fs/ext4/inline.c:235! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 1 UID: 0 PID: 5838 Comm: syz-executor110 Not tainted 6.13.0-rc3-syzkaller-00209-g499551201b5f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:ext4_write_inline_data fs/ext4/inline.c:235 [inline] RIP: 0010:ext4_write_inline_data_end+0xdc7/0xdd0 fs/ext4/inline.c:774 Code: 47 1d 8c e8 4b 3a 91 ff 90 0f 0b e8 63 7a 47 ff 48 8b 7c 24 10 48 c7 c6 e0 47 1d 8c e8 32 3a 91 ff 90 0f 0b e8 4a 7a 47 ff 90 <0f> 0b 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffc900031c7320 EFLAGS: 00010293 RAX: ffffffff8257f9a6 RBX: 000000000000005a RCX: ffff888012968000 RDX: 0000000000000000 RSI: 000000000000005a RDI: 000000000000005b RBP: ffffc900031c7448 R08: ffffffff8257ef87 R09: 1ffff11006806070 R10: dffffc0000000000 R11: ffffed1006806071 R12: 000000000000005a R13: dffffc0000000000 R14: ffff888076b65bd8 R15: 000000000000005b FS: 00007f5c6bacf6c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000a00 CR3: 0000000073fb6000 CR4: 0000000000350ef0 Call Trace: <TASK> generic_perform_write+0x6f8/0x990 mm/filemap.c:4070 ext4_buffered_write_iter+0xc5/0x350 fs/ext4/file.c:299 ext4_file_write_iter+0x892/0x1c50 iter_file_splice_write+0xbfc/0x1510 fs/splice.c:743 do_splice_from fs/splice.c:941 [inline] direct_splice_actor+0x11d/0x220 fs/splice.c:1164 splice_direct_to_actor+0x588/0xc80 fs/splice.c:1108 do_splice_direct_actor fs/splice.c:1207 [inline] do_splice_direct+0x289/0x3e0 fs/splice.c:1233 do_sendfile+0x564/0x8a0 fs/read_write.c:1363 __do_sys_sendfile64 fs/read_write.c:1424 [inline] __se_sys_sendfile64+0x17c/0x1e0 fs/read_write.c:1410 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5c6bb18d09 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5c6bacf218 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 00007f5c6bba0708 RCX: 00007f5c6bb18d09 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000004 RBP: 00007f5c6bba0700 R08: 0000000000000000 R09: 0000000000000000 R10: 000080001d00c0d0 R11: 0000000000000246 R12: 00007f5c6bb6d620 R13: 00007f5c6bb6d0c0 R14: 0031656c69662f2e R15: 8088e3ad122bc192 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- This happens because ext4_page_mkwrite is not protected by the inode_lock. The xattr semaphore is not sufficient to protect inline data handling in a sane way, so we need to rely on the inode_lock. Adding the inode_lock to ext4_page_mkwrite is not an option, otherwise lock-ordering problems with mmap_lock may arise. The conversion inside ext4_page_mkwrite was introduced at commit 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map"). This fixes a documented bug in the commit message, which suggests some alternative fixes. Convert inline data when mmap is called, instead of doing it only when the mmapped page is written to. Using the inode_lock there does not lead to lock-ordering issues. The drawback is that inline conversion will happen when the file is mmapped, even though the page will not be written to. Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map") Reported-by: syzbot+0c89d865531d053abb2d(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c89d865531d053abb2d Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com> Cc: stable(a)vger.kernel.org --- Changes in v2: - Convert inline data at mmap time, avoiding data loss. - Link to v1: https://lore.kernel.org/r/20250519-ext4_inline_page_mkwrite-v1-1-865d9a62b5… --- fs/ext4/file.c | 6 ++++++ fs/ext4/inode.c | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index beb078ee4811d6092e362e37307e7d87e5276cbc..f2380471df5d99500e49fdc639fa3e56143c328f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -819,6 +819,12 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) if (!daxdev_mapping_supported(vma, dax_dev)) return -EOPNOTSUPP; + inode_lock(inode); + ret = ext4_convert_inline_data(inode); + inode_unlock(inode); + if (ret) + return ret; + file_accessed(file); if (IS_DAX(file_inode(file))) { vma->vm_ops = &ext4_dax_vm_ops; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 94c7d2d828a64e42ded09c82497ed7617071aa19..895ecda786194b29d32c9c49785d56a1a84e2096 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -6222,10 +6222,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) filemap_invalidate_lock_shared(mapping); - err = ext4_convert_inline_data(inode); - if (err) - goto out_ret; - /* * On data journalling we skip straight to the transaction handle: * there's no delalloc; page truncated will be checked later; the --- base-commit: 4a95bc121ccdaee04c4d72f84dbfa6b880a514b6 change-id: 20250519-ext4_inline_page_mkwrite-c42ca1f02295 Best regards, -- Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>

1 week, 4 days

2
1
0 0

[PATCH v4] fs: minix: Fix handling of corrupted directories

by Andrey Kriulin

If the directory is corrupted and the number of nlinks is less than 2 (valid nlinks have at least 2), then when the directory is deleted, the minix_rmdir will try to reduce the nlinks(unsigned int) to a negative value. Make nlinks validity check for directories. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable(a)vger.kernel.org Signed-off-by: Andrey Kriulin <kitotavrik.s(a)gmail.com> --- v4: Add nlinks check for parent dirictory to minix_rmdir per Jan Kara <jack(a)suse.cz> request. v3: Move nlinks validaty check to minix_rmdir and minix_rename per Jan Kara <jack(a)suse.cz> request. v2: Move nlinks validaty check to V[12]_minix_iget() per Jan Kara <jack(a)suse.cz> request. Change return error code to EUCLEAN. Don't block directory in r/o mode per Al Viro <viro(a)zeniv.linux.org.uk> request. fs/minix/namei.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 8938536d8d3c..ab86fd16e548 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -161,8 +161,12 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry) static int minix_rmdir(struct inode * dir, struct dentry *dentry) { struct inode * inode = d_inode(dentry); - int err = -ENOTEMPTY; + int err = -EUCLEAN; + if (inode->i_nlink < 2 || dir->i_nlink <= 2) + return err; + + err = -ENOTEMPTY; if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { @@ -235,6 +239,10 @@ static int minix_rename(struct mnt_idmap *idmap, mark_inode_dirty(old_inode); if (dir_de) { + if (old_dir->i_nlink <= 2) { + err = -EUCLEAN; + goto out_dir; + } err = minix_set_link(dir_de, dir_folio, new_dir); if (!err) inode_dec_link_count(old_dir); -- 2.47.2

1 week, 4 days

2
1
0 0

FAILED: patch "[PATCH] highmem: add folio_test_partial_kmap()" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052622-segment-presuming-e4c6@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org> Date: Wed, 14 May 2025 18:06:02 +0100 Subject: [PATCH] highmem: add folio_test_partial_kmap() In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP is enabled, we must limit ourselves to PAGE_SIZE bytes per call to kmap_local(). The same problem exists in memcpy_from_folio(), memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more succinctly. Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: Hugh Dickins <hughd(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..c698f8415675 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcce..3b814ce08331 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) {

1 week, 4 days

2
1
0 0

FAILED: patch "[PATCH] mm: fix copy_vma() error handling for hugetlb mappings" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x ee40c9920ac286c5bfe7c811e66ff899266d2582 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025052601-directive-strep-6b85@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From ee40c9920ac286c5bfe7c811e66ff899266d2582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo=20Navarro?= <rcn(a)igalia.com> Date: Fri, 23 May 2025 14:19:10 +0200 Subject: [PATCH] mm: fix copy_vma() error handling for hugetlb mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If, during a mremap() operation for a hugetlb-backed memory mapping, copy_vma() fails after the source vma has been duplicated and opened (ie. vma_link() fails), the error is handled by closing the new vma. This updates the hugetlbfs reservation counter of the reservation map which at this point is referenced by both the source vma and the new copy. As a result, once the new vma has been freed and copy_vma() returns, the reservation counter for the source vma will be incorrect. This patch addresses this corner case by clearing the hugetlb private page reservation reference for the new vma and decrementing the reference before closing the vma, so that vma_close() won't update the reservation counter. This is also what copy_vma_and_data() does with the source vma if copy_vma() succeeds, so a helper function has been added to do the fixup in both functions. The issue was reported by a private syzbot instance and can be reproduced using the C reproducer in [1]. It's also a possible duplicate of public syzbot report [2]. The WARNING report is: ============================================================ page_counter underflow: -1024 nr_pages=1024 WARNING: CPU: 0 PID: 3287 at mm/page_counter.c:61 page_counter_cancel+0xf6/0x120 Modules linked in: CPU: 0 UID: 0 PID: 3287 Comm: repro__WARNING_ Not tainted 6.15.0-rc7+ #54 NONE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-2-gc13ff2cd-prebuilt.qemu.org 04/01/2014 RIP: 0010:page_counter_cancel+0xf6/0x120 Code: ff 5b 41 5e 41 5f 5d c3 cc cc cc cc e8 f3 4f 8f ff c6 05 64 01 27 06 01 48 c7 c7 60 15 f8 85 48 89 de 4c 89 fa e8 2a a7 51 ff <0f> 0b e9 66 ff ff ff 44 89 f9 80 e1 07 38 c1 7c 9d 4c 81 RSP: 0018:ffffc900025df6a0 EFLAGS: 00010246 RAX: 2edfc409ebb44e00 RBX: fffffffffffffc00 RCX: ffff8880155f0000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff81c4a23c R09: 1ffff1100330482a R10: dffffc0000000000 R11: ffffed100330482b R12: 0000000000000000 R13: ffff888058a882c0 R14: ffff888058a882c0 R15: 0000000000000400 FS: 0000000000000000(0000) GS:ffff88808fc53000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004b33e0 CR3: 00000000076d6000 CR4: 00000000000006f0 Call Trace: <TASK> page_counter_uncharge+0x33/0x80 hugetlb_cgroup_uncharge_counter+0xcb/0x120 hugetlb_vm_op_close+0x579/0x960 ? __pfx_hugetlb_vm_op_close+0x10/0x10 remove_vma+0x88/0x130 exit_mmap+0x71e/0xe00 ? __pfx_exit_mmap+0x10/0x10 ? __mutex_unlock_slowpath+0x22e/0x7f0 ? __pfx_exit_aio+0x10/0x10 ? __up_read+0x256/0x690 ? uprobe_clear_state+0x274/0x290 ? mm_update_next_owner+0xa9/0x810 __mmput+0xc9/0x370 exit_mm+0x203/0x2f0 ? __pfx_exit_mm+0x10/0x10 ? taskstats_exit+0x32b/0xa60 do_exit+0x921/0x2740 ? do_raw_spin_lock+0x155/0x3b0 ? __pfx_do_exit+0x10/0x10 ? __pfx_do_raw_spin_lock+0x10/0x10 ? _raw_spin_lock_irq+0xc5/0x100 do_group_exit+0x20c/0x2c0 get_signal+0x168c/0x1720 ? __pfx_get_signal+0x10/0x10 ? schedule+0x165/0x360 arch_do_signal_or_restart+0x8e/0x7d0 ? __pfx_arch_do_signal_or_restart+0x10/0x10 ? __pfx___se_sys_futex+0x10/0x10 syscall_exit_to_user_mode+0xb8/0x2c0 do_syscall_64+0x75/0x120 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x422dcd Code: Unable to access opcode bytes at 0x422da3. RSP: 002b:00007ff266cdb208 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: 0000000000000001 RBX: 00007ff266cdbcdc RCX: 0000000000422dcd RDX: 00000000000f4240 RSI: 0000000000000081 RDI: 00000000004c7bec RBP: 00007ff266cdb220 R08: 203a6362696c6720 R09: 203a6362696c6720 R10: 0000200000c00000 R11: 0000000000000246 R12: ffffffffffffffd0 R13: 0000000000000002 R14: 00007ffe1cb5f520 R15: 00007ff266cbb000 </TASK> ============================================================ Link: https://lkml.kernel.org/r/20250523-warning_in_page_counter_cancel-v2-1-b6df… Link: https://people.igalia.com/rcn/kernel_logs/20250422__WARNING_in_page_counter… [1] Link: https://lore.kernel.org/all/67000a50.050a0220.49194.048d.GAE@google.com/ [2] Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com> Suggested-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com> Reviewed-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Cc: Florent Revest <revest(a)google.com> Cc: Jann Horn <jannh(a)google.com> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f..4861a7e304bb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 55e6796b24d0..6a3cf7935c14 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1250,7 +1250,7 @@ void hugetlb_dup_vma_private(struct vm_area_struct *vma) /* * Reset and decrement one ref on hugepage private reservation. * Called with mm->mmap_lock writer semaphore held. - * This function should be only used by move_vma() and operate on + * This function should be only used by mremap and operate on * same sized vma. It should never come here with last ref on the * reservation. */ @@ -7939,3 +7939,17 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), ALIGN_DOWN(vma->vm_end, PUD_SIZE)); } + +/* + * For hugetlb, mremap() is an odd edge case - while the VMA copying is + * performed, we permit both the old and new VMAs to reference the same + * reservation. + * + * We fix this up after the operation succeeds, or if a newly allocated VMA + * is closed as a result of a failure to allocate memory. + */ +void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ + if (is_vm_hugetlb_page(vma)) + clear_vma_resv_huge_pages(vma); +} diff --git a/mm/mremap.c b/mm/mremap.c index 7db9da609c84..0d4948b720e2 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1188,8 +1188,7 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, mremap_userfaultfd_prep(new_vma, vrm->uf); } - if (is_vm_hugetlb_page(vma)) - clear_vma_resv_huge_pages(vma); + fixup_hugetlb_reservations(vma); /* Tell pfnmap has moved from this vma */ if (unlikely(vma->vm_flags & VM_PFNMAP)) diff --git a/mm/vma.c b/mm/vma.c index 839d12f02c88..a468d4c29c0c 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -1834,6 +1834,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return new_vma; out_vma_link: + fixup_hugetlb_reservations(new_vma); vma_close(new_vma); if (new_vma->vm_file)

1 week, 4 days

3
2
0 0

[PATCH] ext4: inline: do not convert when writing to memory map

by Thadeu Lima de Souza Cascardo

inline data handling has a race between writing and writing to a memory map. When ext4_page_mkwrite is called, it calls ext4_convert_inline_data, which destroys the inline data, but if block allocation fails, restores the inline data. In that process, we could have: CPU1 CPU2 destroy_inline_data write_begin (does not see inline data) restory_inline_data write_end (sees inline data) This leads to bugs like the one below, as write_begin did not prepare for the case of inline data, which is expected by the write_end side of it. ------------[ cut here ]------------ kernel BUG at fs/ext4/inline.c:235! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 1 UID: 0 PID: 5838 Comm: syz-executor110 Not tainted 6.13.0-rc3-syzkaller-00209-g499551201b5f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:ext4_write_inline_data fs/ext4/inline.c:235 [inline] RIP: 0010:ext4_write_inline_data_end+0xdc7/0xdd0 fs/ext4/inline.c:774 Code: 47 1d 8c e8 4b 3a 91 ff 90 0f 0b e8 63 7a 47 ff 48 8b 7c 24 10 48 c7 c6 e0 47 1d 8c e8 32 3a 91 ff 90 0f 0b e8 4a 7a 47 ff 90 <0f> 0b 0f 1f 80 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 RSP: 0018:ffffc900031c7320 EFLAGS: 00010293 RAX: ffffffff8257f9a6 RBX: 000000000000005a RCX: ffff888012968000 RDX: 0000000000000000 RSI: 000000000000005a RDI: 000000000000005b RBP: ffffc900031c7448 R08: ffffffff8257ef87 R09: 1ffff11006806070 R10: dffffc0000000000 R11: ffffed1006806071 R12: 000000000000005a R13: dffffc0000000000 R14: ffff888076b65bd8 R15: 000000000000005b FS: 00007f5c6bacf6c0(0000) GS:ffff8880b8700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000a00 CR3: 0000000073fb6000 CR4: 0000000000350ef0 Call Trace: <TASK> generic_perform_write+0x6f8/0x990 mm/filemap.c:4070 ext4_buffered_write_iter+0xc5/0x350 fs/ext4/file.c:299 ext4_file_write_iter+0x892/0x1c50 iter_file_splice_write+0xbfc/0x1510 fs/splice.c:743 do_splice_from fs/splice.c:941 [inline] direct_splice_actor+0x11d/0x220 fs/splice.c:1164 splice_direct_to_actor+0x588/0xc80 fs/splice.c:1108 do_splice_direct_actor fs/splice.c:1207 [inline] do_splice_direct+0x289/0x3e0 fs/splice.c:1233 do_sendfile+0x564/0x8a0 fs/read_write.c:1363 __do_sys_sendfile64 fs/read_write.c:1424 [inline] __se_sys_sendfile64+0x17c/0x1e0 fs/read_write.c:1410 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f5c6bb18d09 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5c6bacf218 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 RAX: ffffffffffffffda RBX: 00007f5c6bba0708 RCX: 00007f5c6bb18d09 RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000004 RBP: 00007f5c6bba0700 R08: 0000000000000000 R09: 0000000000000000 R10: 000080001d00c0d0 R11: 0000000000000246 R12: 00007f5c6bb6d620 R13: 00007f5c6bb6d0c0 R14: 0031656c69662f2e R15: 8088e3ad122bc192 </TASK> Modules linked in: ---[ end trace 0000000000000000 ]--- This happens because ext4_page_mkwrite is not protected by the inode_lock. The xattr semaphore is not sufficient to protect inline data handling in a sane way, so we need to rely on the inode_lock. Adding the inode_lock to ext4_page_mkwrite is not an option, otherwise lock-ordering problems with mmap_lock may arise. The conversion inside ext4_page_mkwrite was introduced at commit 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map"). This fixes a documented bug in the commit message, which suggests some alternatives fixes. The alternative of keeping the data inline is left as an exercise for the reader. Instead, block allocation still happens, just as it does right now. But removal of the inline data is only done when pages are written back. Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map") Reported-by: syzbot+0c89d865531d053abb2d(a)syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c89d865531d053abb2d Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com> Cc: stable(a)vger.kernel.org --- fs/ext4/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 94c7d2d828a64e42ded09c82497ed7617071aa19..38653d5c8b32ede2b130285ab13ef1e96b1ba783 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2620,8 +2620,7 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) ret = PTR_ERR(handle); goto out_writepages; } - BUG_ON(ext4_test_inode_state(inode, - EXT4_STATE_MAY_INLINE_DATA)); + ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); ext4_destroy_inline_data(handle, inode); ext4_journal_stop(handle); } @@ -6222,10 +6221,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) filemap_invalidate_lock_shared(mapping); - err = ext4_convert_inline_data(inode); - if (err) - goto out_ret; - /* * On data journalling we skip straight to the transaction handle: * there's no delalloc; page truncated will be checked later; the --- base-commit: a5806cd506af5a7c19bcd596e4708b5c464bfd21 change-id: 20250519-ext4_inline_page_mkwrite-c42ca1f02295 Best regards, -- Thadeu Lima de Souza Cascardo <cascardo(a)igalia.com>

1 week, 4 days

3
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror May 2025