Commit 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO")
adds a gpio_set_debounce_timeout() call to acpi_find_gpio() and makes
acpi_find_gpio() fail if this fails.
But gpio_set_debounce_timeout() failing is a somewhat normal occurrence,
since not all debounce values are supported on all GPIO/pinctrl chips.
Making this an error for example break getting the card-detect GPIO for
the micro-sd slot found on many Bay Trail tablets, breaking support for
the micro-sd slot on these tablets.
acpi_request_own_gpiod() already treats gpio_set_debounce_timeout()
failures as non-fatal, just warning about them.
Add a acpi_gpio_set_debounce_timeout() helper which wraps
gpio_set_debounce_timeout() and warns on failures and replace both existing
gpio_set_debounce_timeout() calls with the helper.
Since the helper only warns on failures this fixes the card-detect issue.
Fixes: 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO")
Cc: stable(a)vger.kernel.org
Cc: Mario Limonciello <superm1(a)kernel.org>
Signed-off-by: Hans de Goede <hansg(a)kernel.org>
---
drivers/gpio/gpiolib-acpi-core.c | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c
index 284e762d92c4..67c4c38afb86 100644
--- a/drivers/gpio/gpiolib-acpi-core.c
+++ b/drivers/gpio/gpiolib-acpi-core.c
@@ -291,6 +291,19 @@ acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity)
return GPIOD_ASIS;
}
+static void acpi_gpio_set_debounce_timeout(struct gpio_desc *desc,
+ unsigned int acpi_debounce)
+{
+ int ret;
+
+ /* ACPI uses hundredths of milliseconds units */
+ acpi_debounce *= 10;
+ ret = gpio_set_debounce_timeout(desc, acpi_debounce);
+ if (ret)
+ gpiod_warn(desc, "Failed to set debounce-timeout %u: %d\n",
+ acpi_debounce, ret);
+}
+
static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
struct acpi_resource_gpio *agpio,
unsigned int index,
@@ -300,18 +313,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip,
enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity);
unsigned int pin = agpio->pin_table[index];
struct gpio_desc *desc;
- int ret;
desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags);
if (IS_ERR(desc))
return desc;
- /* ACPI uses hundredths of milliseconds units */
- ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10);
- if (ret)
- dev_warn(chip->parent,
- "Failed to set debounce-timeout for pin 0x%04X, err %d\n",
- pin, ret);
+ acpi_gpio_set_debounce_timeout(desc, agpio->debounce_timeout);
return desc;
}
@@ -944,7 +951,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
bool can_fallback = acpi_can_fallback_to_crs(adev, con_id);
struct acpi_gpio_info info = {};
struct gpio_desc *desc;
- int ret;
desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info);
if (IS_ERR(desc))
@@ -959,10 +965,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
acpi_gpio_update_gpiod_flags(dflags, &info);
acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info);
- /* ACPI uses hundredths of milliseconds units */
- ret = gpio_set_debounce_timeout(desc, info.debounce * 10);
- if (ret)
- return ERR_PTR(ret);
+ acpi_gpio_set_debounce_timeout(desc, info.debounce);
return desc;
}
--
2.51.0
When a software-node gets added to a device which already has another
fwnode as primary node it will become the secondary fwnode for that
device.
Currently if a software-node with GPIO properties ends up as the secondary
fwnode then gpiod_find_by_fwnode() will fail to find the GPIOs.
Add a new gpiod_fwnode_lookup() helper which falls back to calling
gpiod_find_by_fwnode() with the secondary fwnode if the GPIO was not
found in the primary fwnode.
Fixes: e7f9ff5dc90c ("gpiolib: add support for software nodes")
Cc: stable(a)vger.kernel.org
Cc: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Signed-off-by: Hans de Goede <hansg(a)kernel.org>
---
Changes in v2:
- Add a new gpiod_fwnode_lookup() helper instead of putting the secondary
fwnode check inside gpiod_find_by_fwnode()
---
drivers/gpio/gpiolib.c | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 0d2b470a252e..74d54513730a 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4604,6 +4604,23 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
return desc;
}
+static struct gpio_desc *gpiod_fwnode_lookup(struct fwnode_handle *fwnode,
+ struct device *consumer,
+ const char *con_id,
+ unsigned int idx,
+ enum gpiod_flags *flags,
+ unsigned long *lookupflags)
+{
+ struct gpio_desc *desc;
+
+ desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx, flags, lookupflags);
+ if (gpiod_not_found(desc) && !IS_ERR_OR_NULL(fwnode))
+ desc = gpiod_find_by_fwnode(fwnode->secondary, consumer, con_id,
+ idx, flags, lookupflags);
+
+ return desc;
+}
+
struct gpio_desc *gpiod_find_and_request(struct device *consumer,
struct fwnode_handle *fwnode,
const char *con_id,
@@ -4622,8 +4639,8 @@ struct gpio_desc *gpiod_find_and_request(struct device *consumer,
int ret = 0;
scoped_guard(srcu, &gpio_devices_srcu) {
- desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx,
- &flags, &lookupflags);
+ desc = gpiod_fwnode_lookup(fwnode, consumer, con_id, idx,
+ &flags, &lookupflags);
if (gpiod_not_found(desc) && platform_lookup_allowed) {
/*
* Either we are not using DT or ACPI, or their lookup
--
2.51.0
ep_events_available() checks for available events by looking at ep->rdllist
and ep->ovflist. However, this is done without a lock, therefore the
returned value is not reliable. Because it is possible that both checks on
ep->rdllist and ep->ovflist are false while ep_start_scan() or
ep_done_scan() is being executed on other CPUs, despite events are
available.
This bug can be observed by:
1. Create an eventpoll with at least one ready level-triggered event
2. Create multiple threads who do epoll_wait() with zero timeout. The
threads do not consume the events, therefore all epoll_wait() should
return at least one event.
If one thread is executing ep_events_available() while another thread is
executing ep_start_scan() or ep_done_scan(), epoll_wait() may wrongly
return no event for the former thread.
This reproducer is implemented as TEST(epoll65) in
tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
Fix it by skipping ep_events_available(), just call ep_try_send_events()
directly.
epoll_sendevents() (io_uring) suffers the same problem, fix that as well.
There is still ep_busy_loop() who uses ep_events_available() without lock,
but it is probably okay (?) for busy-polling.
Fixes: c5a282e9635e ("fs/epoll: reduce the scope of wq lock in epoll_wait()")
Fixes: e59d3c64cba6 ("epoll: eliminate unnecessary lock for zero timeout")
Fixes: ae3a4f1fdc2c ("eventpoll: add epoll_sendevents() helper")
Signed-off-by: Nam Cao <namcao(a)linutronix.de>
Cc: stable(a)vger.kernel.org
---
fs/eventpoll.c | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0fbf5dfedb24..541481eafc20 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2022,7 +2022,7 @@ static int ep_schedule_timeout(ktime_t *to)
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, struct timespec64 *timeout)
{
- int res, eavail, timed_out = 0;
+ int res, eavail = 1, timed_out = 0;
u64 slack = 0;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
@@ -2041,16 +2041,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
timed_out = 1;
}
- /*
- * This call is racy: We may or may not see events that are being added
- * to the ready list under the lock (e.g., in IRQ callbacks). For cases
- * with a non-zero timeout, this thread will check the ready list under
- * lock and will add to the wait queue. For cases with a zero
- * timeout, the user by definition should not care and will have to
- * recheck again.
- */
- eavail = ep_events_available(ep);
-
while (1) {
if (eavail) {
res = ep_try_send_events(ep, events, maxevents);
@@ -2496,9 +2486,7 @@ int epoll_sendevents(struct file *file, struct epoll_event __user *events,
* Racy call, but that's ok - it should get retried based on
* poll readiness anyway.
*/
- if (ep_events_available(ep))
- return ep_try_send_events(ep, events, maxevents);
- return 0;
+ return ep_try_send_events(ep, events, maxevents);
}
/*
--
2.39.5
The mei_ace driver contains a device reference count leak in
mei_ace_setup_dev_link() where device_find_child_by_name() increases
the reference count of the found device but this reference is not
properly decreased in the success path. Add put_device() in
mei_ace_setup_dev_link() and delete put_device() in mei_ace_remove(),
which ensures that the reference count of the device is correctly
managed regardless of whether the probe is successful or fails.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 78876f71b3e9 ("media: pci: intel: ivsc: Add ACE submodule")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/media/pci/intel/ivsc/mei_ace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/media/pci/intel/ivsc/mei_ace.c b/drivers/media/pci/intel/ivsc/mei_ace.c
index 98310b8511b1..261b30788118 100644
--- a/drivers/media/pci/intel/ivsc/mei_ace.c
+++ b/drivers/media/pci/intel/ivsc/mei_ace.c
@@ -421,6 +421,7 @@ static int mei_ace_setup_dev_link(struct mei_ace *ace)
}
ace->csi_dev = csi_dev;
+ put_device(csi_dev);
return 0;
@@ -522,7 +523,6 @@ static void mei_ace_remove(struct mei_cl_device *cldev)
cancel_work_sync(&ace->work);
device_link_del(ace->csi_link);
- put_device(ace->csi_dev);
pm_runtime_disable(&cldev->dev);
pm_runtime_set_suspended(&cldev->dev);
--
2.17.1
The callback return value is ignored in damon_sysfs_damon_call(), which
means that it is not possible to detect invalid user input when writing
commands such as 'commit' to /sys/kernel/mm/damon/admin/kdamonds/<K>/state.
Fix it.
Signed-off-by: Akinobu Mita <akinobu.mita(a)gmail.com>
Fixes: f64539dcdb87 ("mm/damon/sysfs: use damon_call() for update_schemes_stats")
Cc: <stable(a)vger.kernel.org> # v6.14.x
Reviewed-by: SeongJae Park <sj(a)kernel.org>
---
mm/damon/sysfs.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index fe4e73d0ebbb..3ffe3a77b5db 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1627,12 +1627,14 @@ static int damon_sysfs_damon_call(int (*fn)(void *data),
struct damon_sysfs_kdamond *kdamond)
{
struct damon_call_control call_control = {};
+ int err;
if (!kdamond->damon_ctx)
return -EINVAL;
call_control.fn = fn;
call_control.data = kdamond;
- return damon_call(kdamond->damon_ctx, &call_control);
+ err = damon_call(kdamond->damon_ctx, &call_control);
+ return err ? err : call_control.return_code;
}
struct damon_sysfs_schemes_walk_data {
--
2.43.0
Once of_device_register() failed, we should call put_device() to
decrement reference count for cleanup. Or it could cause memory leak.
So fix this by calling put_device(), then the name can be freed in
kobject_cleanup().
Calling path: of_device_register() -> of_device_add() -> device_add().
As comment of device_add() says, 'if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: cf44bbc26cf1 ("[SPARC]: Beginnings of generic of_device framework.")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v3:
- also fixed the same problem in arch/sparc/kernel/of_device_32.c as suggestions.
Changes in v2:
- retained kfree() manually due to the lack of a release callback function.
---
arch/sparc/kernel/of_device_32.c | 1 +
arch/sparc/kernel/of_device_64.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
index 06012e68bdca..284a4cafa432 100644
--- a/arch/sparc/kernel/of_device_32.c
+++ b/arch/sparc/kernel/of_device_32.c
@@ -387,6 +387,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp,
if (of_device_register(op)) {
printk("%pOF: Could not register of device.\n", dp);
+ put_device(&op->dev);
kfree(op);
op = NULL;
}
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index f98c2901f335..f53092b07b9e 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -677,6 +677,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp,
if (of_device_register(op)) {
printk("%pOF: Could not register of device.\n", dp);
+ put_device(&op->dev);
kfree(op);
op = NULL;
}
--
2.17.1
It seems like everywhere in this file, when the request is not
bidirectionala, req->src is mapped with DMA_TO_DEVICE and req->dst is
mapped with DMA_FROM_DEVICE.
Fixes: 62f58b1637b7 ("crypto: aspeed - add HACE crypto driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Thomas Fourier <fourier.thomas(a)gmail.com>
---
v1->v2:
- fix confusion between dst and src in commit message
drivers/crypto/aspeed/aspeed-hace-crypto.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/crypto/aspeed/aspeed-hace-crypto.c b/drivers/crypto/aspeed/aspeed-hace-crypto.c
index a72dfebc53ff..fa201dae1f81 100644
--- a/drivers/crypto/aspeed/aspeed-hace-crypto.c
+++ b/drivers/crypto/aspeed/aspeed-hace-crypto.c
@@ -346,7 +346,7 @@ static int aspeed_sk_start_sg(struct aspeed_hace_dev *hace_dev)
} else {
dma_unmap_sg(hace_dev->dev, req->dst, rctx->dst_nents,
- DMA_TO_DEVICE);
+ DMA_FROM_DEVICE);
dma_unmap_sg(hace_dev->dev, req->src, rctx->src_nents,
DMA_TO_DEVICE);
}
--
2.43.0
This series includes a total of 27 patches, to align minmax.h of
v5.15.y with v6.17-rc6.
The set consists of 24 commits that directly update minmax.h:
1) 92d23c6e9415 ("overflow, tracing: Define the is_signed_type() macro
once")
2) 5efcecd9a3b1 ("minmax: sanity check constant bounds when clamping")
3) 2122e2a4efc2 ("minmax: clamp more efficiently by avoiding extra
comparison")
4) f9bff0e31881 ("minmax: add in_range() macro")
5) c952c748c7a9 ("minmax: Introduce {min,max}_array()")
6) 5e57418a2031 ("minmax: deduplicate __unconst_integer_typeof()")
7) f6e9d38f8eb0 ("minmax: fix header inclusions")
8) d03eba99f5bf ("minmax: allow min()/max()/clamp() if the arguments
have the same signedness.")
9) f4b84b2ff851 ("minmax: fix indentation of __cmp_once() and
__clamp_once()")
10) 4ead534fba42 ("minmax: allow comparisons of 'int' against 'unsigned
char/short'")
11) 867046cc7027 ("minmax: relax check to allow comparison between
unsigned arguments and signed constants")
12) 3a7e02c040b1 ("minmax: avoid overly complicated constant
expressions in VM code")
14) 017fa3e89187 ("minmax: simplify and clarify min_t()/max_t()
implementation")
15) 1a251f52cfdc ("minmax: make generic MIN() and MAX() macros
available everywhere")
18) dc1c8034e31b ("minmax: simplify min()/max()/clamp()
implementation")
19) 22f546873149 ("minmax: improve macro expansion and type
checking")
20) 21b136cc63d2 ("minmax: fix up min3() and max3() too")
21) 71ee9b16251e ("minmax.h: add whitespace around operators and after
commas")
22) 10666e992048 ("minmax.h: update some comments")
23) b280bb27a9f7 ("minmax.h: reduce the #define expansion of min(),
max() and clamp()")
24) a5743f32baec ("minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi
test in clamp()")
25) c3939872ee4a ("minmax.h: move all the clamp() definitions after the
min/max() ones")
26) 495bba17cdf9 ("minmax.h: simplify the variants of clamp()")
27) 2b97aaf74ed5 ("minmax.h: remove some #defines that are only
expanded once")
2 prerequisite commits that adjust users of MIN and MAX macros (to
prevent compilation issues):
13) 4477b39c32fd ("minmax: add a few more MIN_T/MAX_T users")
17) cb04e8b1d2f2 ("minmax: don't use max() in situations that want a C
constant expression")
1 additional commit introduced to resolve a build failures during the
backport:
16) lib: zstd: drop local MIN/MAX macros in favor of generic ones
The primary motivation is to bring in commit (8).
In mainline, this change allows min()/max()/clamp() to accept mixed
argument types when both share the same signedness.
Backported patches to v5.10.y that use such forms trigger compiler
warnings, which in turn cause build failures when -Werror is enabled.
Originaly I aligned 5.10.y to 5.15.y, but David Laight commented that I
need to pick up the later changes (from Linus) as well.
Andy Shevchenko (2):
minmax: deduplicate __unconst_integer_typeof()
minmax: fix header inclusions
Bart Van Assche (1):
overflow, tracing: Define the is_signed_type() macro once
David Laight (11):
minmax: allow min()/max()/clamp() if the arguments have the same
signedness.
minmax: fix indentation of __cmp_once() and __clamp_once()
minmax: allow comparisons of 'int' against 'unsigned char/short'
minmax: relax check to allow comparison between unsigned arguments and
signed constants
minmax.h: add whitespace around operators and after commas
minmax.h: update some comments
minmax.h: reduce the #define expansion of min(), max() and clamp()
minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi test in clamp()
minmax.h: move all the clamp() definitions after the min/max() ones
minmax.h: simplify the variants of clamp()
minmax.h: remove some #defines that are only expanded once
Eliav Farber (1):
lib: zstd: drop local MIN/MAX macros in favor of generic ones
Herve Codina (1):
minmax: Introduce {min,max}_array()
Jason A. Donenfeld (2):
minmax: sanity check constant bounds when clamping
minmax: clamp more efficiently by avoiding extra comparison
Linus Torvalds (8):
minmax: avoid overly complicated constant expressions in VM code
minmax: add a few more MIN_T/MAX_T users
minmax: simplify and clarify min_t()/max_t() implementation
minmax: make generic MIN() and MAX() macros available everywhere
minmax: don't use max() in situations that want a C constant
expression
minmax: simplify min()/max()/clamp() implementation
minmax: improve macro expansion and type checking
minmax: fix up min3() and max3() too
Matthew Wilcox (Oracle) (1):
minmax: add in_range() macro
arch/arm/mm/pageattr.c | 6 +-
arch/um/drivers/mconsole_user.c | 2 +
arch/x86/mm/pgtable.c | 2 +-
drivers/edac/sb_edac.c | 4 +-
drivers/edac/skx_common.h | 1 -
.../drm/amd/display/modules/hdcp/hdcp_ddc.c | 2 +
.../drm/amd/pm/powerplay/hwmgr/ppevvmath.h | 14 +-
.../drm/arm/display/include/malidp_utils.h | 2 +-
.../display/komeda/komeda_pipeline_state.c | 24 +-
drivers/gpu/drm/drm_color_mgmt.c | 2 +-
drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 6 -
drivers/gpu/drm/radeon/evergreen_cs.c | 2 +
drivers/hwmon/adt7475.c | 24 +-
drivers/input/touchscreen/cyttsp4_core.c | 2 +-
drivers/md/dm-integrity.c | 2 +-
drivers/media/dvb-frontends/stv0367_priv.h | 3 +
.../net/ethernet/chelsio/cxgb3/cxgb3_main.c | 18 +-
.../net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
drivers/net/fjes/fjes_main.c | 4 +-
drivers/nfc/pn544/i2c.c | 2 -
drivers/platform/x86/sony-laptop.c | 1 -
drivers/scsi/isci/init.c | 6 +-
.../pci/hive_isp_css_include/math_support.h | 5 -
fs/btrfs/misc.h | 2 -
fs/btrfs/tree-checker.c | 2 +-
fs/ext2/balloc.c | 2 -
fs/ext4/ext4.h | 2 -
fs/ufs/util.h | 6 -
include/linux/compiler.h | 15 +
include/linux/minmax.h | 267 ++++++++++++++----
include/linux/overflow.h | 1 -
include/linux/trace_events.h | 2 -
kernel/trace/preemptirq_delay_test.c | 2 -
lib/btree.c | 1 -
lib/decompress_unlzma.c | 2 +
lib/logic_pio.c | 3 -
lib/vsprintf.c | 2 +-
lib/zstd/zstd_internal.h | 2 -
mm/zsmalloc.c | 1 -
net/ipv4/proc.c | 2 +-
net/ipv6/proc.c | 2 +-
net/netfilter/nf_nat_core.c | 6 +-
net/tipc/core.h | 2 +-
net/tipc/link.c | 10 +-
44 files changed, 306 insertions(+), 164 deletions(-)
--
2.47.3
In of_unittest_pci_node_verify(), when the add parameter is false,
device_find_any_child() obtains a reference to a child device. This
function implicitly calls get_device() to increment the device's
reference count before returning the pointer. However, the caller
fails to properly release this reference by calling put_device(),
leading to a device reference count leak.
As the comment of device_find_any_child states: "NOTE: you will need
to drop the reference with put_device() after use".
Cc: stable(a)vger.kernel.org
Fixes: 26409dd04589 ("of: unittest: Add pci_dt_testdrv pci driver")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/of/unittest.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index e3503ec20f6c..d225e73781fe 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -4271,7 +4271,7 @@ static struct platform_driver unittest_pci_driver = {
static int of_unittest_pci_node_verify(struct pci_dev *pdev, bool add)
{
struct device_node *pnp, *np = NULL;
- struct device *child_dev;
+ struct device *child_dev = NULL;
char *path = NULL;
const __be32 *reg;
int rc = 0;
@@ -4306,6 +4306,8 @@ static int of_unittest_pci_node_verify(struct pci_dev *pdev, bool add)
kfree(path);
if (np)
of_node_put(np);
+ if (child_dev)
+ put_device(child_dev);
return rc;
}
--
2.17.1
Greetings!!
We are a 24+ yr old high tech Web Development firm with presence of over
18+ yrs in Mauritius; partners of RV Tec hAdvisora Ltd and headquartered in
India
We have catered to over 7000 customers. You may visit
https://www.mirackle.com for more information about our company. We create
designs that help businesses and individuals attract and engage readers. We
work with all the latest technologies.
We are Authorized Google Workspace Reseller Partner for Asia Pacific region
including Mauritius.
Our Services: Domain Registrations, Web hosting, Google Workspace, Mobile
Responsive Website Designing, Wordpress Websites, Mobile Apps, Web Apps,
E-commerce websites, Google Ads, SEO, Catalogue design & affiliated services
We create beautiful designs. Our brief website portfolio:
http://www.mirackle.com/portfolio.html
Note: We are also looking for tie-ups with IT/Web design cos. who would
want to outsource work for high end Websites/Mobile APP requirements etc.
We have a team of highly skilled php coders who can cater to any complex
requirement.
Get in touch with to get the best prices & offers
India Whatsapp: +91 9323272846 / 9323551195; Mauritius WharsApp: +230 5758
5497; Email: business(a)mirackle.com ; Web: http://www.mirackle.com
Regards,
Nishith Patel
Make sure we return the right pud value and not a value that could
have been overwritten in between by a different core.
Fixes: c3cc2a4a3a23 ("riscv: Add support for PUD THP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexandre Ghiti <alexghiti(a)rivosinc.com>
---
Note that this will conflict with
https://lore.kernel.org/linux-riscv/20250625063753.77511-1-ajd@linux.ibm.co…
if applied after 6.17.
---
arch/riscv/include/asm/pgtable.h | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 91697fbf1f9013005800f713797e4b6b1fc8d312..e69346307e78608dd98d8b7a77b7063c333448ee 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -942,6 +942,17 @@ static inline int pudp_test_and_clear_young(struct vm_area_struct *vma,
return ptep_test_and_clear_young(vma, address, (pte_t *)pudp);
}
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pud_t *pudp)
+{
+ pud_t pud = __pud(atomic_long_xchg((atomic_long_t *)pudp, 0));
+
+ page_table_check_pud_clear(mm, pud);
+
+ return pud;
+}
+
static inline int pud_young(pud_t pud)
{
return pte_young(pud_pte(pud));
---
base-commit: 62950c35a515743739e3d863eac25c20a5bd1613
change-id: 20250814-dev-alex-thp_pud_xchg-8153c313d946
Best regards,
--
Alexandre Ghiti <alexghiti(a)rivosinc.com>
commit 96939cec994070aa5df852c10fad5fc303a97ea3 upstream.
When a SYN containing the 'C' flag (deny join id0) was received, this
piece of information was not propagated to the path-manager.
Even if this flag is mainly set on the server side, a client can also
tell the server it cannot try to establish new subflows to the client's
initial IP address and port. The server's PM should then record such
info when received, and before sending events about the new connection.
Fixes: df377be38725 ("mptcp: add deny_join_id0 in mptcp_options_received")
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20250912-net-mptcp-pm-uspace-deny_join_id0-v1-1-40…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Conflicts in subflow.c, because of differences in the context, e.g.
introduced by commit 3a236aef280e ("mptcp: refactor passive socket
initialization"), which is not in this version. The same lines --
using 'mptcp_sk(new_msk)' instead of 'owner' -- can still be added
approximately at the same place, before calling
mptcp_pm_new_connection(). ]
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
net/mptcp/subflow.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6bc36132d490..f67d8c98d58a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -758,6 +758,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
*/
WRITE_ONCE(mptcp_sk(new_msk)->first, child);
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(mptcp_sk(new_msk)->pm.remote_deny_join_id0, true);
+
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
--
2.51.0
Currently this is hidden behind perfmon_capable() since this is
technically an info leak, given that this is a system wide metric.
However the granularity reported here is always PAGE_SIZE aligned, which
matches what the core kernel is already willing to expose to userspace
if querying how many free RAM pages there are on the system, and that
doesn't need any special privileges. In addition other drm drivers seem
happy to expose this.
The motivation here if with oneAPI where they want to use the system
wide 'used' reporting here, so not the per-client fdinfo stats. This has
also come up with some perf overlay applications wanting this
information.
Fixes: 1105ac15d2a1 ("drm/xe/uapi: restrict system wide accounting")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Joshua Santosh <joshua.santosh.ranjan(a)intel.com>
Cc: José Roberto de Souza <jose.souza(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_query.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index e1b603aba61b..2e9ff33ed2fe 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -276,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[0].instance = 0;
mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
- if (perfmon_capable())
- mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+ mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
mem_regions->num_mem_regions = 1;
for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -293,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
man->size;
- if (perfmon_capable()) {
- xe_ttm_vram_get_used(man,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].used,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].cpu_visible_used);
- }
+ xe_ttm_vram_get_used(man,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].used,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].cpu_visible_used);
mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
xe_ttm_vram_get_cpu_visible_size(man);
--
2.51.0
commit 2293c57484ae64c9a3c847c8807db8c26a3a4d41 upstream.
During the connection establishment, a peer can tell the other one that
it cannot establish new subflows to the initial IP address and port by
setting the 'C' flag [1]. Doing so makes sense when the sender is behind
a strict NAT, operating behind a legacy Layer 4 load balancer, or using
anycast IP address for example.
When this 'C' flag is set, the path-managers must then not try to
establish new subflows to the other peer's initial IP address and port.
The in-kernel PM has access to this info, but the userspace PM didn't.
The RFC8684 [1] is strict about that:
(...) therefore the receiver MUST NOT try to open any additional
subflows toward this address and port.
So it is important to tell the userspace about that as it is responsible
for the respect of this flag.
When a new connection is created and established, the Netlink events
now contain the existing but not currently used 'flags' attribute. When
MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 is set, it means no other subflows
to the initial IP address and port -- info that are also part of the
event -- can be established.
Link: https://datatracker.ietf.org/doc/html/rfc8684#section-3.1-20.6 [1]
Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment")
Reported-by: Marek Majkowski <marek(a)cloudflare.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/532
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20250912-net-mptcp-pm-uspace-deny_join_id0-v1-2-40…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Conflicts in mptcp_pm.yaml, because the indentation has been modified
in commit ec362192aa9e ("netlink: specs: fix up indentation errors"),
which is not in this version. Applying the same modifications, but at
a different level. ]
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Documentation/netlink/specs/mptcp_pm.yaml | 4 ++--
include/uapi/linux/mptcp.h | 2 ++
include/uapi/linux/mptcp_pm.h | 4 ++--
net/mptcp/pm_netlink.c | 7 +++++++
4 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index 7e295bad8b29..a670a9bbe01b 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -28,13 +28,13 @@ definitions:
traffic-patterns it can take a long time until the
MPTCP_EVENT_ESTABLISHED is sent.
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: established
doc: >-
A MPTCP connection is established (can start new subflows).
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: closed
doc: >-
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 67d015df8893..5fd5b4cf75ca 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -31,6 +31,8 @@
#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
+#define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0)
+
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index 6ac84b2f636c..7359d34da446 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -16,10 +16,10 @@
* good time to allocate memory and send ADD_ADDR if needed. Depending on the
* traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
* is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new
* subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token.
* @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer.
* Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b763729b85e0..463c2e7956d5 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -2211,6 +2211,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct sock *ssk)
{
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
+ u16 flags = 0;
if (err)
return err;
@@ -2218,6 +2219,12 @@ static int mptcp_event_created(struct sk_buff *skb,
if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
return -EMSGSIZE;
+ if (READ_ONCE(msk->pm.remote_deny_join_id0))
+ flags |= MPTCP_PM_EV_FLAG_DENY_JOIN_ID0;
+
+ if (flags && nla_put_u16(skb, MPTCP_ATTR_FLAGS, flags))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
--
2.51.0
commit 2293c57484ae64c9a3c847c8807db8c26a3a4d41 upstream.
During the connection establishment, a peer can tell the other one that
it cannot establish new subflows to the initial IP address and port by
setting the 'C' flag [1]. Doing so makes sense when the sender is behind
a strict NAT, operating behind a legacy Layer 4 load balancer, or using
anycast IP address for example.
When this 'C' flag is set, the path-managers must then not try to
establish new subflows to the other peer's initial IP address and port.
The in-kernel PM has access to this info, but the userspace PM didn't.
The RFC8684 [1] is strict about that:
(...) therefore the receiver MUST NOT try to open any additional
subflows toward this address and port.
So it is important to tell the userspace about that as it is responsible
for the respect of this flag.
When a new connection is created and established, the Netlink events
now contain the existing but not currently used 'flags' attribute. When
MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 is set, it means no other subflows
to the initial IP address and port -- info that are also part of the
event -- can be established.
Link: https://datatracker.ietf.org/doc/html/rfc8684#section-3.1-20.6 [1]
Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment")
Reported-by: Marek Majkowski <marek(a)cloudflare.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/532
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://patch.msgid.link/20250912-net-mptcp-pm-uspace-deny_join_id0-v1-2-40…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[ Conflicts in mptcp_pm.yaml, because the indentation has been modified
in commit ec362192aa9e ("netlink: specs: fix up indentation errors"),
which is not in this version. Applying the same modifications, but at
a different level. ]
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Documentation/netlink/specs/mptcp_pm.yaml | 4 ++--
include/uapi/linux/mptcp.h | 2 ++
include/uapi/linux/mptcp_pm.h | 4 ++--
net/mptcp/pm_netlink.c | 7 +++++++
4 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index ecfe5ee33de2..c77f32cfcae9 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -28,13 +28,13 @@ definitions:
traffic-patterns it can take a long time until the
MPTCP_EVENT_ESTABLISHED is sent.
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: established
doc: >-
A MPTCP connection is established (can start new subflows).
Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6, sport,
- dport, server-side.
+ dport, server-side, [flags].
-
name: closed
doc: >-
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index 67d015df8893..5fd5b4cf75ca 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -31,6 +31,8 @@
#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
+#define MPTCP_PM_EV_FLAG_DENY_JOIN_ID0 _BITUL(0)
+
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h
index 6ac84b2f636c..7359d34da446 100644
--- a/include/uapi/linux/mptcp_pm.h
+++ b/include/uapi/linux/mptcp_pm.h
@@ -16,10 +16,10 @@
* good time to allocate memory and send ADD_ADDR if needed. Depending on the
* traffic-patterns it can take a long time until the MPTCP_EVENT_ESTABLISHED
* is sent. Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_ESTABLISHED: A MPTCP connection is established (can start new
* subflows). Attributes: token, family, saddr4 | saddr6, daddr4 | daddr6,
- * sport, dport, server-side.
+ * sport, dport, server-side, [flags].
* @MPTCP_EVENT_CLOSED: A MPTCP connection has stopped. Attribute: token.
* @MPTCP_EVENT_ANNOUNCED: A new address has been announced by the peer.
* Attributes: token, rem_id, family, daddr4 | daddr6 [, dport].
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 50aaf259959a..ce7d42d3bd00 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -408,6 +408,7 @@ static int mptcp_event_created(struct sk_buff *skb,
const struct sock *ssk)
{
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
+ u16 flags = 0;
if (err)
return err;
@@ -415,6 +416,12 @@ static int mptcp_event_created(struct sk_buff *skb,
if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
return -EMSGSIZE;
+ if (READ_ONCE(msk->pm.remote_deny_join_id0))
+ flags |= MPTCP_PM_EV_FLAG_DENY_JOIN_ID0;
+
+ if (flags && nla_put_u16(skb, MPTCP_ATTR_FLAGS, flags))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
--
2.51.0
Hi Sasha,
Thank you for maintaining the stable versions with Greg!
If I remember well, you run some scripts on your side to maintain the
queue/* branches in the linux-stable-rc Git tree [1], is that correct?
These branches have not been updated for a bit more than 3 weeks. Is it
normal?
Personally, I find them useful. But if it is just me, I can work without
them.
[1]
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/…
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
The patch titled
Subject: kmsan: Fix out-of-bounds access to shadow memory
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kmsan-fix-out-of-bounds-access-to-shadow-memory.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Eric Biggers <ebiggers(a)kernel.org>
Subject: kmsan: Fix out-of-bounds access to shadow memory
Date: Thu, 11 Sep 2025 12:58:58 -0700
Running sha224_kunit on a KMSAN-enabled kernel results in a crash in
kmsan_internal_set_shadow_origin():
BUG: unable to handle page fault for address: ffffbc3840291000
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 1810067 P4D 1810067 PUD 192d067 PMD 3c17067 PTE 0
Oops: 0000 [#1] SMP NOPTI
CPU: 0 UID: 0 PID: 81 Comm: kunit_try_catch Tainted: G N 6.17.0-rc3 #10 PREEMPT(voluntary)
Tainted: [N]=TEST
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
RIP: 0010:kmsan_internal_set_shadow_origin+0x91/0x100
[...]
Call Trace:
<TASK>
__msan_memset+0xee/0x1a0
sha224_final+0x9e/0x350
test_hash_buffer_overruns+0x46f/0x5f0
? kmsan_get_shadow_origin_ptr+0x46/0xa0
? __pfx_test_hash_buffer_overruns+0x10/0x10
kunit_try_run_case+0x198/0xa00
This occurs when memset() is called on a buffer that is not 4-byte aligned
and extends to the end of a guard page, i.e. the next page is unmapped.
The bug is that the loop at the end of kmsan_internal_set_shadow_origin()
accesses the wrong shadow memory bytes when the address is not 4-byte
aligned. Since each 4 bytes are associated with an origin, it rounds the
address and size so that it can access all the origins that contain the
buffer. However, when it checks the corresponding shadow bytes for a
particular origin, it incorrectly uses the original unrounded shadow
address. This results in reads from shadow memory beyond the end of the
buffer's shadow memory, which crashes when that memory is not mapped.
To fix this, correctly align the shadow address before accessing the 4
shadow bytes corresponding to each origin.
Link: https://lkml.kernel.org/r/20250911195858.394235-1-ebiggers@kernel.org
Fixes: 2ef3cec44c60 ("kmsan: do not wipe out origin when doing partial unpoisoning")
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
Tested-by: Alexander Potapenko <glider(a)google.com>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Cc: Dmitriy Vyukov <dvyukov(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kmsan/core.c | 10 +++++++---
mm/kmsan/kmsan_test.c | 16 ++++++++++++++++
2 files changed, 23 insertions(+), 3 deletions(-)
--- a/mm/kmsan/core.c~kmsan-fix-out-of-bounds-access-to-shadow-memory
+++ a/mm/kmsan/core.c
@@ -195,7 +195,8 @@ void kmsan_internal_set_shadow_origin(vo
u32 origin, bool checked)
{
u64 address = (u64)addr;
- u32 *shadow_start, *origin_start;
+ void *shadow_start;
+ u32 *aligned_shadow, *origin_start;
size_t pad = 0;
KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(addr, size));
@@ -214,9 +215,12 @@ void kmsan_internal_set_shadow_origin(vo
}
__memset(shadow_start, b, size);
- if (!IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ if (IS_ALIGNED(address, KMSAN_ORIGIN_SIZE)) {
+ aligned_shadow = shadow_start;
+ } else {
pad = address % KMSAN_ORIGIN_SIZE;
address -= pad;
+ aligned_shadow = shadow_start - pad;
size += pad;
}
size = ALIGN(size, KMSAN_ORIGIN_SIZE);
@@ -230,7 +234,7 @@ void kmsan_internal_set_shadow_origin(vo
* corresponding shadow slot is zero.
*/
for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++) {
- if (origin || !shadow_start[i])
+ if (origin || !aligned_shadow[i])
origin_start[i] = origin;
}
}
--- a/mm/kmsan/kmsan_test.c~kmsan-fix-out-of-bounds-access-to-shadow-memory
+++ a/mm/kmsan/kmsan_test.c
@@ -556,6 +556,21 @@ DEFINE_TEST_MEMSETXX(16)
DEFINE_TEST_MEMSETXX(32)
DEFINE_TEST_MEMSETXX(64)
+/* Test case: ensure that KMSAN does not access shadow memory out of bounds. */
+static void test_memset_on_guarded_buffer(struct kunit *test)
+{
+ void *buf = vmalloc(PAGE_SIZE);
+
+ kunit_info(test,
+ "memset() on ends of guarded buffer should not crash\n");
+
+ for (size_t size = 0; size <= 128; size++) {
+ memset(buf, 0xff, size);
+ memset(buf + PAGE_SIZE - size, 0xff, size);
+ }
+ vfree(buf);
+}
+
static noinline void fibonacci(int *array, int size, int start)
{
if (start < 2 || (start == size))
@@ -677,6 +692,7 @@ static struct kunit_case kmsan_test_case
KUNIT_CASE(test_memset16),
KUNIT_CASE(test_memset32),
KUNIT_CASE(test_memset64),
+ KUNIT_CASE(test_memset_on_guarded_buffer),
KUNIT_CASE(test_long_origin_chain),
KUNIT_CASE(test_stackdepot_roundtrip),
KUNIT_CASE(test_unpoison_memory),
_
Patches currently in -mm which might be from ebiggers(a)kernel.org are
kmsan-fix-out-of-bounds-access-to-shadow-memory.patch
The patch titled
Subject: Squashfs: fix uninit-value in squashfs_get_parent
has been added to the -mm mm-nonmm-unstable branch. Its filename is
squashfs-fix-uninit-value-in-squashfs_get_parent.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Phillip Lougher <phillip(a)squashfs.org.uk>
Subject: Squashfs: fix uninit-value in squashfs_get_parent
Date: Fri, 19 Sep 2025 00:33:08 +0100
Syzkaller reports a "KMSAN: uninit-value in squashfs_get_parent" bug.
This is caused by open_by_handle_at() being called with a file handle
containing an invalid parent inode number. In particular the inode number
is that of a symbolic link, rather than a directory.
Squashfs_get_parent() gets called with that symbolic link inode, and
accesses the parent member field.
unsigned int parent_ino = squashfs_i(inode)->parent;
Because non-directory inodes in Squashfs do not have a parent value, this
is uninitialised, and this causes an uninitialised value access.
The fix is to initialise parent with the invalid inode 0, which will cause
an EINVAL error to be returned.
Regular inodes used to share the parent field with the block_list_start
field. This is removed in this commit to enable the parent field to
contain the invalid inode number 0.
Link: https://lkml.kernel.org/r/20250918233308.293861-1-phillip@squashfs.org.uk
Fixes: 122601408d20 ("Squashfs: export operations")
Signed-off-by: Phillip Lougher <phillip(a)squashfs.org.uk>
Reported-by: syzbot+157bdef5cf596ad0da2c(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68cc2431.050a0220.139b6.0001.GAE@google.com/
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/squashfs/inode.c | 7 +++++++
fs/squashfs/squashfs_fs_i.h | 2 +-
2 files changed, 8 insertions(+), 1 deletion(-)
--- a/fs/squashfs/inode.c~squashfs-fix-uninit-value-in-squashfs_get_parent
+++ a/fs/squashfs/inode.c
@@ -169,6 +169,7 @@ int squashfs_read_inode(struct inode *in
squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -216,6 +217,7 @@ int squashfs_read_inode(struct inode *in
squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
squashfs_i(inode)->block_list_start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
inode->i_data.a_ops = &squashfs_aops;
TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -296,6 +298,7 @@ int squashfs_read_inode(struct inode *in
inode->i_mode |= S_IFLNK;
squashfs_i(inode)->start = block;
squashfs_i(inode)->offset = offset;
+ squashfs_i(inode)->parent = 0;
if (type == SQUASHFS_LSYMLINK_TYPE) {
__le32 xattr;
@@ -333,6 +336,7 @@ int squashfs_read_inode(struct inode *in
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -357,6 +361,7 @@ int squashfs_read_inode(struct inode *in
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+ squashfs_i(inode)->parent = 0;
TRACE("Device inode %x:%x, rdev %x\n",
SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -377,6 +382,7 @@ int squashfs_read_inode(struct inode *in
inode->i_mode |= S_IFSOCK;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
case SQUASHFS_LFIFO_TYPE:
@@ -396,6 +402,7 @@ int squashfs_read_inode(struct inode *in
inode->i_op = &squashfs_inode_ops;
set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
+ squashfs_i(inode)->parent = 0;
break;
}
default:
--- a/fs/squashfs/squashfs_fs_i.h~squashfs-fix-uninit-value-in-squashfs_get_parent
+++ a/fs/squashfs/squashfs_fs_i.h
@@ -16,6 +16,7 @@ struct squashfs_inode_info {
u64 xattr;
unsigned int xattr_size;
int xattr_count;
+ int parent;
union {
struct {
u64 fragment_block;
@@ -27,7 +28,6 @@ struct squashfs_inode_info {
u64 dir_idx_start;
int dir_idx_offset;
int dir_idx_cnt;
- int parent;
};
};
struct inode vfs_inode;
_
Patches currently in -mm which might be from phillip(a)squashfs.org.uk are
squashfs-fix-uninit-value-in-squashfs_get_parent.patch
The patch titled
Subject: fs/proc/task_mmu: check cur_buf for NULL
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-proc-task_mmu-check-cur_buf-for-null.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Jakub Acs <acsjakub(a)amazon.de>
Subject: fs/proc/task_mmu: check cur_buf for NULL
Date: Fri, 19 Sep 2025 14:21:04 +0000
When the PAGEMAP_SCAN ioctl is invoked with vec_len = 0 reaches
pagemap_scan_backout_range(), kernel panics with null-ptr-deref:
[ 44.936808] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.937797] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
[ 44.938391] CPU: 1 UID: 0 PID: 2480 Comm: reproducer Not tainted 6.17.0-rc6 #22 PREEMPT(none)
[ 44.939062] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.939935] RIP: 0010:pagemap_scan_thp_entry.isra.0+0x741/0xa80
<snip registers, unreliable trace>
[ 44.946828] Call Trace:
[ 44.947030] <TASK>
[ 44.949219] pagemap_scan_pmd_entry+0xec/0xfa0
[ 44.952593] walk_pmd_range.isra.0+0x302/0x910
[ 44.954069] walk_pud_range.isra.0+0x419/0x790
[ 44.954427] walk_p4d_range+0x41e/0x620
[ 44.954743] walk_pgd_range+0x31e/0x630
[ 44.955057] __walk_page_range+0x160/0x670
[ 44.956883] walk_page_range_mm+0x408/0x980
[ 44.958677] walk_page_range+0x66/0x90
[ 44.958984] do_pagemap_scan+0x28d/0x9c0
[ 44.961833] do_pagemap_cmd+0x59/0x80
[ 44.962484] __x64_sys_ioctl+0x18d/0x210
[ 44.962804] do_syscall_64+0x5b/0x290
[ 44.963111] entry_SYSCALL_64_after_hwframe+0x76/0x7e
vec_len = 0 in pagemap_scan_init_bounce_buffer() means no buffers are
allocated and p->vec_buf remains set to NULL.
This breaks an assumption made later in pagemap_scan_backout_range(), that
page_region is always allocated for p->vec_buf_index.
Fix it by explicitly checking cur_buf for NULL before dereferencing.
Other sites that might run into same deref-issue are already (directly or
transitively) protected by checking p->vec_buf.
Note:
From PAGEMAP_SCAN man page, it seems vec_len = 0 is valid when no output
is requested and it's only the side effects caller is interested in, hence
it passes check in pagemap_scan_get_args().
This issue was found by syzkaller.
Link: https://lkml.kernel.org/r/20250919142106.43527-1-acsjakub@amazon.de
Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs")
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Penglei Jiang <superman.xpt(a)gmail.com>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: "Micha�� Miros��aw" <mirq-linux(a)rere.qmqm.pl>
Cc: Stephen Rothwell <sfr(a)canb.auug.org.au>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: Alexey Dobriyan <adobriyan(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 3 +++
1 file changed, 3 insertions(+)
--- a/fs/proc/task_mmu.c~fs-proc-task_mmu-check-cur_buf-for-null
+++ a/fs/proc/task_mmu.c
@@ -2417,6 +2417,9 @@ static void pagemap_scan_backout_range(s
{
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
+ if (!cur_buf)
+ return;
+
if (cur_buf->start != addr)
cur_buf->end = addr;
else
_
Patches currently in -mm which might be from acsjakub(a)amazon.de are
fs-proc-task_mmu-check-cur_buf-for-null.patch
Bug-report: https://lore.kernel.org/all/915c0e00-b92d-4e37-9d4b-0f6a4580da97@oracle.com/
Summary: While backporting commit: 7c62c442b6eb ("x86/vmscape: Enumerate
VMSCAPE bug") to 6.12.y --> VULNBL_AMD(0x1a, SRSO | VMSCAPE) was added
even when 6.12.y doesn't have commit: 877818802c3e ("x86/bugs: Add
SRSO_USER_KERNEL_NO support").
Boris Ostrovsky suggested backporting three commits to 6.12.y:
1. commit: 877818802c3e ("x86/bugs: Add SRSO_USER_KERNEL_NO support")
2. commit: 8442df2b49ed ("x86/bugs: KVM: Add support for SRSO_MSR_FIX")
and its fix
3. commit: e3417ab75ab2 ("KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0
<=> 1 VM count transitions") -- Maybe optional
Which changes current mitigation status on turin for 6.12.48 from Safe
RET to Reduced Speculation, leaving it with Safe RET liely causes heavy
performance regressions.
This three patches together change mitigation status from Safe RET to
Reduced Speculation
Tested on Turin:
[ 3.188134] Speculative Return Stack Overflow: Mitigation: Reduced Speculation
Backports:
1. Patch 1 had minor conflict as VMSCAPE commit added VULNBL_AMD(0x1a,
SRSO | VMSCAPE), and resolution is to skip that line.
2. Patch 2 and 3 are clean cherry-picks, 3 is a fix for 2.
Note: I verified if this problem is also on other stable trees like (6.6
--> 5.10, no they don't have this backport problem)
Thanks,
Harshit
Borislav Petkov (1):
x86/bugs: KVM: Add support for SRSO_MSR_FIX
Borislav Petkov (AMD) (1):
x86/bugs: Add SRSO_USER_KERNEL_NO support
Sean Christopherson (1):
KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count
transitions
Documentation/admin-guide/hw-vuln/srso.rst | 13 +++++
arch/x86/include/asm/cpufeatures.h | 5 ++
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kernel/cpu/bugs.c | 28 ++++++++--
arch/x86/kvm/svm/svm.c | 65 ++++++++++++++++++++++
arch/x86/kvm/svm/svm.h | 2 +
arch/x86/lib/msr.c | 2 +
7 files changed, 112 insertions(+), 4 deletions(-)
--
2.50.1
The following commit has been merged into the x86/cpu branch of tip:
Commit-ID: 32278c677947ae2f042c9535674a7fff9a245dd3
Gitweb: https://git.kernel.org/tip/32278c677947ae2f042c9535674a7fff9a245dd3
Author: Sean Christopherson <seanjc(a)google.com>
AuthorDate: Fri, 08 Aug 2025 10:23:56 -07:00
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 19 Sep 2025 20:21:12 +02:00
x86/umip: Check that the instruction opcode is at least two bytes
When checking for a potential UMIP violation on #GP, verify the decoder found
at least two opcode bytes to avoid false positives when the kernel encounters
an unknown instruction that starts with 0f. Because the array of opcode.bytes
is zero-initialized by insn_init(), peeking at bytes[1] will misinterpret
garbage as a potential SLDT or STR instruction, and can incorrectly trigger
emulation.
E.g. if a VPALIGNR instruction
62 83 c5 05 0f 08 ff vpalignr xmm17{k5},xmm23,XMMWORD PTR [r8],0xff
hits a #GP, the kernel emulates it as STR and squashes the #GP (and corrupts
the userspace code stream).
Arguably the check should look for exactly two bytes, but no three byte
opcodes use '0f 00 xx' or '0f 01 xx' as an escape, i.e. it should be
impossible to get a false positive if the first two opcode bytes match '0f 00'
or '0f 01'. Go with a more conservative check with respect to the existing
code to minimize the chances of breaking userspace, e.g. due to decoder
weirdness.
Analyzed by Nick Bray <ncbray(a)google.com>.
Fixes: 1e5db223696a ("x86/umip: Add emulation code for UMIP instructions")
Reported-by: Dan Snyder <dansnyder(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/umip.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 5a4b213..406ac01 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -156,8 +156,8 @@ static int identify_insn(struct insn *insn)
if (!insn->modrm.nbytes)
return -EINVAL;
- /* All the instructions of interest start with 0x0f. */
- if (insn->opcode.bytes[0] != 0xf)
+ /* The instructions of interest have 2-byte opcodes: 0F 00 or 0F 01. */
+ if (insn->opcode.nbytes < 2 || insn->opcode.bytes[0] != 0xf)
return -EINVAL;
if (insn->opcode.bytes[1] == 0x1) {
The following commit has been merged into the x86/cpu branch of tip:
Commit-ID: 27b1fd62012dfe9d3eb8ecde344d7aa673695ecf
Gitweb: https://git.kernel.org/tip/27b1fd62012dfe9d3eb8ecde344d7aa673695ecf
Author: Sean Christopherson <seanjc(a)google.com>
AuthorDate: Fri, 08 Aug 2025 10:23:57 -07:00
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 19 Sep 2025 21:34:48 +02:00
x86/umip: Fix decoding of register forms of 0F 01 (SGDT and SIDT aliases)
Filter out the register forms of 0F 01 when determining whether or not to
emulate in response to a potential UMIP violation #GP, as SGDT and SIDT only
accept memory operands. The register variants of 0F 01 are used to encode
instructions for things like VMX and SGX, i.e. not checking the Mod field
would cause the kernel to incorrectly emulate on #GP, e.g. due to a CPL
violation on VMLAUNCH.
Fixes: 1e5db223696a ("x86/umip: Add emulation code for UMIP instructions")
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/umip.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 406ac01..d432f38 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -163,8 +163,19 @@ static int identify_insn(struct insn *insn)
if (insn->opcode.bytes[1] == 0x1) {
switch (X86_MODRM_REG(insn->modrm.value)) {
case 0:
+ /* The reg form of 0F 01 /0 encodes VMX instructions. */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3)
+ return -EINVAL;
+
return UMIP_INST_SGDT;
case 1:
+ /*
+ * The reg form of 0F 01 /1 encodes MONITOR/MWAIT,
+ * STAC/CLAC, and ENCLS.
+ */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3)
+ return -EINVAL;
+
return UMIP_INST_SIDT;
case 4:
return UMIP_INST_SMSW;
From: HariKrishna Sagala <hariconscious(a)gmail.com>
Syzbot reported an uninit-value bug on at kmalloc_reserve for
commit 320475fbd590 ("Merge tag 'mtd/fixes-for-6.17-rc6' of
git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux")'
Syzbot KMSAN reported use of uninitialized memory originating from functions
"kmalloc_reserve()", where memory allocated via "kmem_cache_alloc_node()" or
"kmalloc_node_track_caller()" was not explicitly initialized.
This can lead to undefined behavior when the allocated buffer
is later accessed.
Fix this by requesting the initialized memory using the gfp flag
appended with the option "__GFP_ZERO".
Reported-by: syzbot+9a4fbb77c9d4aacd3388(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=9a4fbb77c9d4aacd3388
Fixes: 915d975b2ffa ("net: deal with integer overflows in
kmalloc_reserve()")
Tested-by: syzbot+9a4fbb77c9d4aacd3388(a)syzkaller.appspotmail.com
Signed-off-by: HariKrishna Sagala <hariconscious(a)gmail.com>
---
net/core/skbuff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ee0274417948..2308ebf99bbd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -573,6 +573,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
+ flags |= __GFP_ZERO;
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
--
2.43.0
Przemek Kitszel says:
Improvements hardening PF-VF communication for i40e driver.
This patchset targets several issues that can cause undefined behavior
or be exploited in some other way.
---
IWL: https://lore.kernel.org/intel-wired-lan/20250813104552.61027-1-przemyslaw.k…
The following are changes since commit cbf658dd09419f1ef9de11b9604e950bdd5c170b:
Merge tag 'net-6.17-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
and are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue 40GbE
Lukasz Czapnik (8):
i40e: add validation for ring_len param
i40e: fix idx validation in i40e_validate_queue_map
i40e: fix idx validation in config queues msg
i40e: fix input validation logic for action_meta
i40e: fix validation of VF state in get resources
i40e: add max boundary check for VF filters
i40e: add mask to apply valid bits for itr_idx
i40e: improve VF MAC filters accounting
drivers/net/ethernet/intel/i40e/i40e.h | 3 +-
drivers/net/ethernet/intel/i40e/i40e_main.c | 26 ++++-
.../ethernet/intel/i40e/i40e_virtchnl_pf.c | 110 ++++++++++--------
.../ethernet/intel/i40e/i40e_virtchnl_pf.h | 3 +-
4 files changed, 90 insertions(+), 52 deletions(-)
--
2.47.1
From: HariKrishna Sagala <hariconscious(a)gmail.com>
Syzbot reported an uninit-value bug on at kmalloc_reserve for
commit 320475fbd590 ("Merge tag 'mtd/fixes-for-6.17-rc6' of
git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux")'
Syzbot KMSAN reported use of uninitialized memory originating from functions
"kmalloc_reserve()", where memory allocated via "kmem_cache_alloc_node()" or
"kmalloc_node_track_caller()" was not explicitly initialized.
This can lead to undefined behavior when the allocated buffer
is later accessed.
Fix this by requesting the initialized memory using the gfp flag
appended with the option "__GFP_ZERO".
Reported-by: syzbot+9a4fbb77c9d4aacd3388(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=9a4fbb77c9d4aacd3388
Fixes: 915d975b2ffa ("net: deal with integer overflows in
kmalloc_reserve()")
Tested-by: syzbot+9a4fbb77c9d4aacd3388(a)syzkaller.appspotmail.com
Cc: <stable(a)vger.kernel.org> # 6.16
Signed-off-by: HariKrishna Sagala <hariconscious(a)gmail.com>
---
RESEND:
- added Cc stable as suggested from kernel test robot
net/core/skbuff.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ee0274417948..2308ebf99bbd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -573,6 +573,7 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
+ flags |= __GFP_ZERO;
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
obj = kmem_cache_alloc_node(net_hotdata.skb_small_head_cache,
--
2.43.0
Once of_device_register() failed, we should call put_device() to
decrement reference count for cleanup. Or it could cause memory leak.
So fix this by calling put_device(), then the name can be freed in
kobject_cleanup().
Calling path: of_device_register() -> of_device_add() -> device_add().
As comment of device_add() says, 'if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: cf44bbc26cf1 ("[SPARC]: Beginnings of generic of_device framework.")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v2:
- retained kfree() manually due to the lack of a release callback function.
---
arch/sparc/kernel/of_device_64.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index f98c2901f335..f53092b07b9e 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -677,6 +677,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp,
if (of_device_register(op)) {
printk("%pOF: Could not register of device.\n", dp);
+ put_device(&op->dev);
kfree(op);
op = NULL;
}
--
2.25.1
Hi stable maintainers,
While skimming over stable backports for VMSCAPE commits, I found
something unusual.
This is regarding the 6.12.y commit: 7c62c442b6eb ("x86/vmscape:
Enumerate VMSCAPE bug")
commit 7c62c442b6eb95d21bc4c5afc12fee721646ebe2
Author: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
Date: Thu Aug 14 10:20:42 2025 -0700
x86/vmscape: Enumerate VMSCAPE bug
Commit a508cec6e5215a3fbc7e73ae86a5c5602187934d upstream.
The VMSCAPE vulnerability may allow a guest to cause Branch Target
Injection (BTI) in userspace hypervisors.
Kernels (both host and guest) have existing defenses against direct BTI
attacks from guests. There are also inter-process BTI mitigations which
prevent processes from attacking each other. However, the threat in
this
case is to a userspace hypervisor within the same process as the
attacker.
Userspace hypervisors have access to their own sensitive data like disk
encryption keys and also typically have access to all guest data. This
means guest userspace may use the hypervisor as a confused deputy
to attack
sensitive guest kernel data. There are no existing mitigations for
these
attacks.
Introduce X86_BUG_VMSCAPE for this vulnerability and set it on affected
Intel and AMD CPUs.
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
So the problem in this commit is this part of the backport:
in file: arch/x86/kernel/cpu/common.c
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
- VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO | TSA),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO | VMSCAPE),
+ VULNBL_AMD(0x19, SRSO | TSA | VMSCAPE),
+ VULNBL_AMD(0x1a, SRSO | VMSCAPE),
+
{}
Notice the part where VULNBL_AMD(0x1a, SRSO | VMSCAPE) is added, 6.12.y
doesn't have commit: 877818802c3e ("x86/bugs: Add SRSO_USER_KERNEL_NO
support") so I think we shouldn't be adding VULNBL_AMD(0x1a, SRSO |
VMSCAPE) directly.
Boris Ostrovsky suggested me to verify this on a Turin machine as this
could cause a very big performance regression : and stated if SRSO
mitigation status is Safe RET we are likely in a problem, and we are in
that situation.
# lscpu | grep -E "CPU family"
CPU family: 26
Notes: CPU ID 26 -> 0x1a
And Turin machine reports the SRSO mitigation status as "Safe RET"
# uname -r
6.12.48-master.20250917.el8.rc1.x86_64
# cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
Mitigation: Safe RET
Boris Ostrovsky suggested backporting three commits to 6.12.y:
1. commit: 877818802c3e ("x86/bugs: Add SRSO_USER_KERNEL_NO support")
2. commit: 8442df2b49ed ("x86/bugs: KVM: Add support for SRSO_MSR_FIX")
and its fix
3. commit: e3417ab75ab2 ("KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0
<=> 1 VM count transitions") -- Maybe optional
After backporting these three:
# uname -r
6.12.48-master.20250919.el8.dev.x86_64 // Note this this is kernel with
patches above three applied.
# dmesg | grep -C 2 Reduce
[ 3.186135] Speculative Store Bypass: Mitigation: Speculative Store
Bypass disabled via prctl
[ 3.187135] Speculative Return Stack Overflow: Reducing speculation to
address VM/HV SRSO attack vector.
[ 3.188134] Speculative Return Stack Overflow: Mitigation: Reduced
Speculation
[ 3.189135] VMSCAPE: Mitigation: IBPB before exit to userspace
[ 3.191139] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point
registers'
# cat /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
Mitigation: Reduced Speculation
I can send my backports to stable if this looks good. Thoughts ?
Thanks,
Harshit
The iput() function is a dangerous one - if the reference counter goes
to zero, the function may block for a long time due to:
- inode_wait_for_writeback() waits until writeback on this inode
completes
- the filesystem-specific "evict_inode" callback can do similar
things; e.g. all netfs-based filesystems will call
netfs_wait_for_outstanding_io() which is similar to
inode_wait_for_writeback()
Therefore, callers must carefully evaluate the context they're in and
check whether invoking iput() is a good idea at all.
Most of the time, this is not a problem because the dcache holds
references to all inodes, and the dcache is usually the one to release
the last reference. But this assumption is fragile. For example,
under (memcg) memory pressure, the dcache shrinker is more likely to
release inode references, moving the inode eviction to contexts where
that was extremely unlikely to occur.
Our production servers "found" at least two deadlock bugs in the Ceph
filesystem that were caused by this iput() behavior:
1. Writeback may lead to iput() calls in Ceph (e.g. from
ceph_put_wrbuffer_cap_refs()) which deadlocks in
inode_wait_for_writeback(). Waiting for writeback completion from
within writeback will obviously never be able to make any progress.
This leads to blocked kworkers like this:
INFO: task kworker/u777:6:1270802 blocked for more than 122 seconds.
Not tainted 6.16.7-i1-es #773
task:kworker/u777:6 state:D stack:0 pid:1270802 tgid:1270802 ppid:2
task_flags:0x4208060 flags:0x00004000
Workqueue: writeback wb_workfn (flush-ceph-3)
Call Trace:
<TASK>
__schedule+0x4ea/0x17d0
schedule+0x1c/0xc0
inode_wait_for_writeback+0x71/0xb0
evict+0xcf/0x200
ceph_put_wrbuffer_cap_refs+0xdd/0x220
ceph_invalidate_folio+0x97/0xc0
ceph_writepages_start+0x127b/0x14d0
do_writepages+0xba/0x150
__writeback_single_inode+0x34/0x290
writeback_sb_inodes+0x203/0x470
__writeback_inodes_wb+0x4c/0xe0
wb_writeback+0x189/0x2b0
wb_workfn+0x30b/0x3d0
process_one_work+0x143/0x2b0
worker_thread+0x30a/0x450
2. In the Ceph messenger thread (net/ceph/messenger*.c), any iput()
call may invoke ceph_evict_inode() which will deadlock in
netfs_wait_for_outstanding_io(); since this blocks the messenger
thread, completions from the Ceph servers will not ever be received
and handled.
It looks like these deadlock bugs have been in the Ceph filesystem
code since forever (therefore no "Fixes" tag in this patch). There
may be various ways to solve this:
- make iput() asynchronous and defer the actual eviction like fput()
(may add overhead)
- make iput() only asynchronous if I_SYNC is set (doesn't solve random
things happening inside the "evict_inode" callback)
- add iput_deferred() to make this asynchronous behavior/overhead
optional and explicit
- refactor Ceph to avoid iput() calls from within writeback and
messenger (if that is even possible)
- add a Ceph-specific workaround
After advice from Mateusz Guzik, I decided to do the latter. The
implementation is simple because it piggybacks on the existing
work_struct for ceph_queue_inode_work() - ceph_inode_work() calls
iput() at the end which means we can donate the last reference to it.
Since Ceph has a few iput() callers in a loop, it seemed simple enough
to pass this counter and use atomic_sub() instead of atomic_dec().
This patch adds ceph_iput_n_async() and converts lots of iput() calls
to it - at least those that may come through writeback and the
messenger.
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
Cc: Mateusz Guzik <mjguzik(a)gmail.com>
Cc: stable(a)vger.kernel.org
---
fs/ceph/addr.c | 2 +-
fs/ceph/caps.c | 21 ++++++++++-----------
fs/ceph/dir.c | 2 +-
fs/ceph/inode.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/ceph/mds_client.c | 32 ++++++++++++++++----------------
fs/ceph/quota.c | 4 ++--
fs/ceph/snap.c | 10 +++++-----
fs/ceph/super.h | 7 +++++++
8 files changed, 84 insertions(+), 36 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 322ed268f14a..fc497c91530e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -265,7 +265,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
subreq->error = err;
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
netfs_read_subreq_terminated(subreq);
- iput(req->r_inode);
+ ceph_iput_async(req->r_inode);
ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b1a8ff612c41..bd88b5287a2b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1771,7 +1771,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
spin_unlock(&mdsc->snap_flush_lock);
if (need_put)
- iput(inode);
+ ceph_iput_async(inode);
}
/*
@@ -3318,8 +3318,8 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
}
if (wake)
wake_up_all(&ci->i_cap_wq);
- while (put-- > 0)
- iput(inode);
+ if (put > 0)
+ ceph_iput_n_async(inode, put);
}
void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
@@ -3418,9 +3418,8 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
}
if (complete_capsnap)
wake_up_all(&ci->i_cap_wq);
- while (put-- > 0) {
- iput(inode);
- }
+ if (put > 0)
+ ceph_iput_n_async(inode, put);
}
/*
@@ -3917,7 +3916,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
if (wake_mdsc)
wake_up_all(&mdsc->cap_flushing_wq);
if (drop)
- iput(inode);
+ ceph_iput_async(inode);
}
void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
@@ -4008,7 +4007,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
wake_up_all(&ci->i_cap_wq);
if (wake_mdsc)
wake_up_all(&mdsc->cap_flushing_wq);
- iput(inode);
+ ceph_iput_async(inode);
}
}
@@ -4557,7 +4556,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
- iput(inode);
+ ceph_iput_async(inode);
out:
ceph_dec_mds_stopping_blocker(mdsc);
@@ -4636,7 +4635,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, 0);
- iput(inode);
+ ceph_iput_async(inode);
spin_lock(&mdsc->cap_delay_lock);
}
@@ -4675,7 +4674,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
spin_unlock(&mdsc->cap_dirty_lock);
ceph_wait_on_async_create(inode);
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
- iput(inode);
+ ceph_iput_async(inode);
spin_lock(&mdsc->cap_dirty_lock);
}
spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 32973c62c1a2..ec73ed52a227 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1290,7 +1290,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
ceph_mdsc_free_path_info(&path_info);
}
out:
- iput(req->r_old_inode);
+ ceph_iput_async(req->r_old_inode);
ceph_mdsc_release_dir_caps(req);
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f67025465de0..385d5261632d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2191,6 +2191,48 @@ void ceph_queue_inode_work(struct inode *inode, int work_bit)
}
}
+/**
+ * Queue an asynchronous iput() call in a worker thread. Use this
+ * instead of iput() in contexts where evicting the inode is unsafe.
+ * For example, inode eviction may cause deadlocks in
+ * inode_wait_for_writeback() (when called from within writeback) or
+ * in netfs_wait_for_outstanding_io() (when called from within the
+ * Ceph messenger).
+ *
+ * @n: how many references to put
+ */
+void ceph_iput_n_async(struct inode *inode, int n)
+{
+ if (unlikely(!inode))
+ return;
+
+ if (likely(atomic_sub_return(n, &inode->i_count) > 0))
+ /* somebody else is holding another reference -
+ * nothing left to do for us
+ */
+ return;
+
+ doutc(ceph_inode_to_fs_client(inode)->client, "%p %llx.%llx\n", inode, ceph_vinop(inode));
+
+ /* the reference counter is now 0, i.e. nobody else is holding
+ * a reference to this inode; restore it to 1 and donate it to
+ * ceph_inode_work() which will call iput() at the end
+ */
+ atomic_set(&inode->i_count, 1);
+
+ /* simply queue a ceph_inode_work() without setting
+ * i_work_mask bit; other than putting the reference, there is
+ * nothing to do
+ */
+ WARN_ON_ONCE(!queue_work(ceph_inode_to_fs_client(inode)->inode_wq,
+ &ceph_inode(inode)->i_work));
+
+ /* note: queue_work() cannot fail; it i_work were already
+ * queued, then it would be holding another reference, but no
+ * such reference exists
+ */
+}
+
static void ceph_do_invalidate_pages(struct inode *inode)
{
struct ceph_client *cl = ceph_inode_to_client(inode);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3bc72b47fe4d..d7fce1ad8073 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1097,14 +1097,14 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_msg_put(req->r_reply);
if (req->r_inode) {
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- iput(req->r_inode);
+ ceph_iput_async(req->r_inode);
}
if (req->r_parent) {
ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
- iput(req->r_parent);
+ ceph_iput_async(req->r_parent);
}
- iput(req->r_target_inode);
- iput(req->r_new_inode);
+ ceph_iput_async(req->r_target_inode);
+ ceph_iput_async(req->r_new_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
@@ -1118,7 +1118,7 @@ void ceph_mdsc_release_request(struct kref *kref)
*/
ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
- iput(req->r_old_dentry_dir);
+ ceph_iput_async(req->r_old_dentry_dir);
}
kfree(req->r_path1);
kfree(req->r_path2);
@@ -1240,7 +1240,7 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
if (req->r_unsafe_dir) {
- iput(req->r_unsafe_dir);
+ ceph_iput_async(req->r_unsafe_dir);
req->r_unsafe_dir = NULL;
}
@@ -1413,7 +1413,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
- iput(inode);
+ ceph_iput_async(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -1422,7 +1422,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
out:
- iput(inode);
+ ceph_iput_async(inode);
return mds;
random:
@@ -1841,7 +1841,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
spin_unlock(&session->s_cap_lock);
if (last_inode) {
- iput(last_inode);
+ ceph_iput_async(last_inode);
last_inode = NULL;
}
if (old_cap) {
@@ -1874,7 +1874,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
session->s_cap_iterator = NULL;
spin_unlock(&session->s_cap_lock);
- iput(last_inode);
+ ceph_iput_async(last_inode);
if (old_cap)
ceph_put_cap(session->s_mdsc, old_cap);
@@ -1903,8 +1903,8 @@ static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
wake_up_all(&ci->i_cap_wq);
if (invalidate)
ceph_queue_invalidate(inode);
- while (iputs--)
- iput(inode);
+ if (iputs > 0)
+ ceph_iput_n_async(inode, iputs);
return 0;
}
@@ -1944,7 +1944,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
spin_unlock(&session->s_cap_lock);
inode = ceph_find_inode(sb, vino);
- iput(inode);
+ ceph_iput_async(inode);
spin_lock(&session->s_cap_lock);
}
@@ -2512,7 +2512,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
doutc(cl, "on %p %llx.%llx\n", inode,
ceph_vinop(inode));
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
- iput(inode);
+ ceph_iput_async(inode);
spin_lock(&mdsc->cap_delay_lock);
}
}
@@ -3933,7 +3933,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
!req->r_reply_info.has_create_ino) {
/* This should never happen on an async create */
WARN_ON_ONCE(req->r_deleg_ino);
- iput(in);
+ ceph_iput_async(in);
in = NULL;
}
@@ -5313,7 +5313,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
out:
mutex_unlock(&session->s_mutex);
- iput(inode);
+ ceph_iput_async(inode);
ceph_dec_mds_stopping_blocker(mdsc);
return;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index d90eda19bcc4..bba00f8926e6 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -76,7 +76,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
le64_to_cpu(h->max_files));
spin_unlock(&ci->i_ceph_lock);
- iput(inode);
+ ceph_iput_async(inode);
out:
ceph_dec_mds_stopping_blocker(mdsc);
}
@@ -190,7 +190,7 @@ void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
node = rb_first(&mdsc->quotarealms_inodes);
qri = rb_entry(node, struct ceph_quotarealm_inode, node);
rb_erase(node, &mdsc->quotarealms_inodes);
- iput(qri->inode);
+ ceph_iput_async(qri->inode);
kfree(qri);
}
mutex_unlock(&mdsc->quotarealms_inodes_mutex);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index c65f2b202b2b..19f097e79b3c 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -735,7 +735,7 @@ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc,
if (!inode)
continue;
spin_unlock(&realm->inodes_with_caps_lock);
- iput(lastinode);
+ ceph_iput_async(lastinode);
lastinode = inode;
/*
@@ -762,7 +762,7 @@ static void queue_realm_cap_snaps(struct ceph_mds_client *mdsc,
spin_lock(&realm->inodes_with_caps_lock);
}
spin_unlock(&realm->inodes_with_caps_lock);
- iput(lastinode);
+ ceph_iput_async(lastinode);
if (capsnap)
kmem_cache_free(ceph_cap_snap_cachep, capsnap);
@@ -955,7 +955,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
ihold(inode);
spin_unlock(&mdsc->snap_flush_lock);
ceph_flush_snaps(ci, &session);
- iput(inode);
+ ceph_iput_async(inode);
spin_lock(&mdsc->snap_flush_lock);
}
spin_unlock(&mdsc->snap_flush_lock);
@@ -1116,12 +1116,12 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
ceph_get_snap_realm(mdsc, realm);
ceph_change_snap_realm(inode, realm);
spin_unlock(&ci->i_ceph_lock);
- iput(inode);
+ ceph_iput_async(inode);
continue;
skip_inode:
spin_unlock(&ci->i_ceph_lock);
- iput(inode);
+ ceph_iput_async(inode);
}
/* we may have taken some of the old realm's children. */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index cf176aab0f82..15c09b6c94aa 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1085,6 +1085,13 @@ static inline void ceph_queue_flush_snaps(struct inode *inode)
ceph_queue_inode_work(inode, CEPH_I_WORK_FLUSH_SNAPS);
}
+void ceph_iput_n_async(struct inode *inode, int n);
+
+static inline void ceph_iput_async(struct inode *inode)
+{
+ ceph_iput_n_async(inode, 1);
+}
+
extern int ceph_try_to_choose_auth_mds(struct inode *inode, int mask);
extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
int mask, bool force);
--
2.47.3
From: Kan Liang <kan.liang(a)linux.intel.com>
[ Upstream commit b0823d5fbacb1c551d793cbfe7af24e0d1fa45ed ]
The perf_fuzzer found a hard-lockup crash on a RaptorLake machine:
Oops: general protection fault, maybe for address 0xffff89aeceab400: 0000
CPU: 23 UID: 0 PID: 0 Comm: swapper/23
Tainted: [W]=WARN
Hardware name: Dell Inc. Precision 9660/0VJ762
RIP: 0010:native_read_pmc+0x7/0x40
Code: cc e8 8d a9 01 00 48 89 03 5b cd cc cc cc cc 0f 1f ...
RSP: 000:fffb03100273de8 EFLAGS: 00010046
....
Call Trace:
<TASK>
icl_update_topdown_event+0x165/0x190
? ktime_get+0x38/0xd0
intel_pmu_read_event+0xf9/0x210
__perf_event_read+0xf9/0x210
CPUs 16-23 are E-core CPUs that don't support the perf metrics feature.
The icl_update_topdown_event() should not be invoked on these CPUs.
It's a regression of commit:
f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read")
The bug introduced by that commit is that the is_topdown_event() function
is mistakenly used to replace the is_topdown_count() call to check if the
topdown functions for the perf metrics feature should be invoked.
Fix it.
Fixes: f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read")
Closes: https://lore.kernel.org/lkml/352f0709-f026-cd45-e60c-60dfd97f73f3@maine.edu/
Reported-by: Vince Weaver <vincent.weaver(a)maine.edu>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Tested-by: Vince Weaver <vincent.weaver(a)maine.edu>
Cc: stable(a)vger.kernel.org # v6.15+
Link: https://lore.kernel.org/r/20250612143818.2889040-1-kan.liang@linux.intel.com
[ omitted PEBS check ]
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Angel Adetula <angeladetula(a)google.com>
---
arch/x86/events/intel/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5e43d390f7a3..36d8404f406d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2793,7 +2793,7 @@ static void intel_pmu_read_event(struct perf_event *event)
if (pmu_enabled)
intel_pmu_disable_all();
- if (is_topdown_event(event))
+ if (is_topdown_count(event))
static_call(intel_pmu_update_topdown_event)(event);
else
intel_pmu_drain_pebs_buffer();
--
2.51.0.470.ga7dc726c21-goog
When PAGEMAP_SCAN ioctl invoked with vec_len = 0 reaches
pagemap_scan_backout_range(), kernel panics with null-ptr-deref:
[ 44.936808] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI
[ 44.937797] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
[ 44.938391] CPU: 1 UID: 0 PID: 2480 Comm: reproducer Not tainted 6.17.0-rc6 #22 PREEMPT(none)
[ 44.939062] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 44.939935] RIP: 0010:pagemap_scan_thp_entry.isra.0+0x741/0xa80
<snip registers, unreliable trace>
[ 44.946828] Call Trace:
[ 44.947030] <TASK>
[ 44.949219] pagemap_scan_pmd_entry+0xec/0xfa0
[ 44.952593] walk_pmd_range.isra.0+0x302/0x910
[ 44.954069] walk_pud_range.isra.0+0x419/0x790
[ 44.954427] walk_p4d_range+0x41e/0x620
[ 44.954743] walk_pgd_range+0x31e/0x630
[ 44.955057] __walk_page_range+0x160/0x670
[ 44.956883] walk_page_range_mm+0x408/0x980
[ 44.958677] walk_page_range+0x66/0x90
[ 44.958984] do_pagemap_scan+0x28d/0x9c0
[ 44.961833] do_pagemap_cmd+0x59/0x80
[ 44.962484] __x64_sys_ioctl+0x18d/0x210
[ 44.962804] do_syscall_64+0x5b/0x290
[ 44.963111] entry_SYSCALL_64_after_hwframe+0x76/0x7e
vec_len = 0 in pagemap_scan_init_bounce_buffer() means no buffers are
allocated and p->vec_buf remains set to NULL.
This breaks an assumption made later in pagemap_scan_backout_range(),
that page_region is always allocated for p->vec_buf_index.
Fix it by explicitly checking cur_buf for NULL before dereferencing.
Other sites that might run into same deref-issue are already (directly
or transitively) protected by checking p->vec_buf.
Note:
From PAGEMAP_SCAN man page, it seems vec_len = 0 is valid when no output
is requested and it's only the side effects caller is interested in,
hence it passes check in pagemap_scan_get_args().
This issue was found by syzkaller.
Fixes: 52526ca7fdb9 ("fs/proc/task_mmu: implement IOCTL to get and optionally clear info about PTEs")
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Jinjiang Tu <tujinjiang(a)huawei.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Penglei Jiang <superman.xpt(a)gmail.com>
Cc: Mark Brown <broonie(a)kernel.org>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Andrei Vagin <avagin(a)gmail.com>
Cc: "Michał Mirosław" <mirq-linux(a)rere.qmqm.pl>
Cc: Stephen Rothwell <sfr(a)canb.auug.org.au>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
linux-kernel(a)vger.kernel.org
linux-fsdevel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Jakub Acs <acsjakub(a)amazon.de>
---
fs/proc/task_mmu.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 29cca0e6d0ff..8c10a8135e74 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2417,6 +2417,9 @@ static void pagemap_scan_backout_range(struct pagemap_scan_private *p,
{
struct page_region *cur_buf = &p->vec_buf[p->vec_buf_index];
+ if (!cur_buf)
+ return;
+
if (cur_buf->start != addr)
cur_buf->end = addr;
else
--
2.47.3
Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597
According to documentation, the DP PHY on x1e80100 has another clock
called ref.
The current X Elite devices supported upstream work fine without this
clock, because the boot firmware leaves this clock enabled. But we should
not rely on that. Also, when it comes to power management, this clock
needs to be also disabled on suspend. So even though this change breaks
the ABI, it is needed in order to make we disable this clock on runtime
PM, when that is going to be enabled in the driver.
So rework the driver to allow different number of clocks, fix the
dt-bindings schema and add the clock to the DT node as well.
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
---
Changes in v3:
- Use dev_err_probe() on clocks parsing failure.
- Explain why the ABI break is necessary.
- Drop the extra 'clk' suffix from the clock name. So ref instead of
refclk.
- Link to v2: https://lore.kernel.org/r/20250903-phy-qcom-edp-add-missing-refclk-v2-0-d88…
Changes in v2:
- Fix schema by adding the minItems, as suggested by Krzysztof.
- Use devm_clk_bulk_get_all, as suggested by Konrad.
- Rephrase the commit messages to reflect the flexible number of clocks.
- Link to v1: https://lore.kernel.org/r/20250730-phy-qcom-edp-add-missing-refclk-v1-0-6f7…
---
Abel Vesa (3):
dt-bindings: phy: qcom-edp: Add missing clock for X Elite
phy: qcom: edp: Make the number of clocks flexible
arm64: dts: qcom: Add missing TCSR ref clock to the DP PHYs
.../devicetree/bindings/phy/qcom,edp-phy.yaml | 28 +++++++++++++++++++++-
arch/arm64/boot/dts/qcom/x1e80100.dtsi | 12 ++++++----
drivers/phy/qualcomm/phy-qcom-edp.c | 16 ++++++-------
3 files changed, 43 insertions(+), 13 deletions(-)
---
base-commit: 65dd046ef55861190ecde44c6d9fcde54b9fb77d
change-id: 20250730-phy-qcom-edp-add-missing-refclk-5ab82828f8e7
Best regards,
--
Abel Vesa <abel.vesa(a)linaro.org>
When a first MPTCP connection gets successfully established after a
blackhole period, 'active_disable_times' was supposed to be reset when
this connection was done via any non-loopback interfaces.
Unfortunately, the opposite condition was checked: only reset when the
connection was established via a loopback interface. Fixing this by
simply looking at the opposite.
This is similar to what is done with TCP FastOpen, see
tcp_fastopen_active_disable_ofo_check().
This patch is a follow-up of a previous discussion linked to commit
893c49a78d9f ("mptcp: Use __sk_dst_get() and dst_dev_rcu() in
mptcp_active_enable()."), see [1].
Fixes: 27069e7cb3d1 ("mptcp: disable active MPTCP in case of blackhole")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/4209a283-8822-47bd-95b7-87e96d9b7ea3@kernel.org [1]
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Cc: Kuniyuki Iwashima <kuniyu(a)google.com>
Note: sending this fix to net-next, similar to commits 108a86c71c93
("mptcp: Call dst_release() in mptcp_active_enable().") and 893c49a78d9f
("mptcp: Use __sk_dst_get() and dst_dev_rcu() in mptcp_active_enable().").
Also to avoid conflicts, and because we are close to the merge windows.
---
net/mptcp/ctrl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index e8ffa62ec183f3cd8156e3969ac4a7d0213a990b..d96130e49942e2fb878cd1897ad43c1d420fb233 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -507,7 +507,7 @@ void mptcp_active_enable(struct sock *sk)
rcu_read_lock();
dst = __sk_dst_get(sk);
dev = dst ? dst_dev_rcu(dst) : NULL;
- if (dev && (dev->flags & IFF_LOOPBACK))
+ if (!(dev && (dev->flags & IFF_LOOPBACK)))
atomic_set(&pernet->active_disable_times, 0);
rcu_read_unlock();
}
---
base-commit: b127e355f1af1e4a635ed8f78cb0d11c916613cf
change-id: 20250918-net-next-mptcp-blackhole-reset-loopback-d82c518e409f
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
This patch fixes a crash in icl_update_topdown_event().
This fix has already been applied to the 'linux-6.1.y', 'linux-6.6.y', and
'linux-6.15.y' stable trees. This submission is to request application to the
'linux-6.12.y' stable tree, as it appears to be still missing there.
This should also fix kernel bug CVE-2025-38322.
Kan Liang (1):
perf/x86/intel: Fix crash in icl_update_topdown_event()
arch/x86/events/intel/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
base-commit: f6cf124428f51e3ef07a8e54c743873face9d2b2
--
2.51.0.470.ga7dc726c21-goog
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x a1b51534b532dd4f0499907865553ee9251bebc3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025091753-raider-wake-9e9d@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a1b51534b532dd4f0499907865553ee9251bebc3 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder(a)riscstar.com>
Date: Tue, 12 Aug 2025 22:13:35 -0500
Subject: [PATCH] dt-bindings: serial: 8250: allow "main" and "uart" as clock
names
There are two compatible strings defined in "8250.yaml" that require
two clocks to be specified, along with their names:
- "spacemit,k1-uart", used in "spacemit/k1.dtsi"
- "nxp,lpc1850-uart", used in "lpc/lpc18xx.dtsi"
When only one clock is used, the name is not required. However there
are two places that do specify a name:
- In "mediatek/mt7623.dtsi", the clock for the "mediatek,mtk-btif"
compatible serial device is named "main"
- In "qca/ar9132.dtsi", the clock for the "ns8250" compatible
serial device is named "uart"
In commit d2db0d7815444 ("dt-bindings: serial: 8250: allow clock
'uartclk' and 'reg' for nxp,lpc1850-uart"), Frank Li added the
restriction that two named clocks be used for the NXP platform
mentioned above.
Change that logic, so that an additional condition for (only) the
SpacemiT platform similarly restricts the two clocks to have the
names "core" and "bus".
Finally, add "main" and "uart" as allowed names when a single clock is
specified.
Fixes: 2c0594f9f0629 ("dt-bindings: serial: 8250: support an optional second clock")
Cc: stable <stable(a)kernel.org>
Reported-by: kernel test robot <lkp(a)intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202507160314.wrC51lXX-lkp@intel.com/
Signed-off-by: Alex Elder <elder(a)riscstar.com>
Acked-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20250813031338.2328392-1-elder@riscstar.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml
index f59c0b37e8eb..b243afa69a1a 100644
--- a/Documentation/devicetree/bindings/serial/8250.yaml
+++ b/Documentation/devicetree/bindings/serial/8250.yaml
@@ -59,7 +59,12 @@ allOf:
items:
- const: uartclk
- const: reg
- else:
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: spacemit,k1-uart
+ then:
properties:
clock-names:
items:
@@ -183,6 +188,9 @@ properties:
minItems: 1
maxItems: 2
oneOf:
+ - enum:
+ - main
+ - uart
- items:
- const: core
- const: bus
In register_shm_helper(), fix incorrect error handling for a call to
iov_iter_extract_pages(). A case is missing for when
iov_iter_extract_pages() only got some pages and return a number larger
than 0, but not the requested amount.
This fixes a possible NULL pointer dereference following a bad input from
ioctl(TEE_IOC_SHM_REGISTER) where parts of the buffer isn't mapped.
Cc: stable(a)vger.kernel.org
Reported-by: Masami Ichikawa <masami256(a)gmail.com>
Closes: https://lore.kernel.org/op-tee/CACOXgS-Bo2W72Nj1_44c7bntyNYOavnTjJAvUbEiQfq…
Fixes: 7bdee4157591 ("tee: Use iov_iter to better support shared buffer registration")
Signed-off-by: Jens Wiklander <jens.wiklander(a)linaro.org>
---
drivers/tee/tee_shm.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
index daf6e5cfd59a..6ed7d030f4ed 100644
--- a/drivers/tee/tee_shm.c
+++ b/drivers/tee/tee_shm.c
@@ -316,7 +316,16 @@ register_shm_helper(struct tee_context *ctx, struct iov_iter *iter, u32 flags,
len = iov_iter_extract_pages(iter, &shm->pages, LONG_MAX, num_pages, 0,
&off);
- if (unlikely(len <= 0)) {
+ if (DIV_ROUND_UP(len + off, PAGE_SIZE) != num_pages) {
+ if (len > 0) {
+ /*
+ * If we only got a few pages, update to release
+ * the correct amount below.
+ */
+ shm->num_pages = len / PAGE_SIZE;
+ ret = ERR_PTR(-ENOMEM);
+ goto err_put_shm_pages;
+ }
ret = len ? ERR_PTR(len) : ERR_PTR(-ENOMEM);
goto err_free_shm_pages;
}
--
2.43.0
This series backports seven commits from v5.15.y that update minmax.h
and related code:
- ed6e37e30826 ("tracing: Define the is_signed_type() macro once")
- 998f03984e25 ("minmax: sanity check constant bounds when clamping")
- d470787b25e6 ("minmax: clamp more efficiently by avoiding extra
comparison")
- 1c2ee5bc9f11 ("minmax: fix header inclusions")
- d53b5d862acd ("minmax: allow min()/max()/clamp() if the arguments
have the same signedness.")
- 7ed91c5560df ("minmax: allow comparisons of 'int' against 'unsigned
char/short'")
- 22f7794ef5a3 ("minmax: relax check to allow comparison between
unsigned arguments and signed constants")
The main motivation is commit d53b5d862acd, which removes the strict
type check in min()/max() when both arguments have the same signedness.
Without this, kernel 5.10 builds can emit warnings that become build
failures when -Werror is used.
Additionally, commit ed6e37e30826 from tracing is required as a
dependency; without it, compilation fails.
Andy Shevchenko (1):
minmax: fix header inclusions
Bart Van Assche (1):
tracing: Define the is_signed_type() macro once
David Laight (3):
minmax: allow min()/max()/clamp() if the arguments have the same
signedness.
minmax: allow comparisons of 'int' against 'unsigned char/short'
minmax: relax check to allow comparison between unsigned arguments and
signed constants
Jason A. Donenfeld (2):
minmax: sanity check constant bounds when clamping
minmax: clamp more efficiently by avoiding extra comparison
include/linux/compiler.h | 6 +++
include/linux/minmax.h | 89 ++++++++++++++++++++++++++----------
include/linux/overflow.h | 1 -
include/linux/trace_events.h | 2 -
4 files changed, 70 insertions(+), 28 deletions(-)
--
2.47.3
There are some AA deadlock issues in kmemleak, similar to the situation
reported by Breno [1]. The deadlock path is as follows:
mem_pool_alloc()
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
-> pr_warn()
-> netconsole subsystem
-> netpoll
-> __alloc_skb
-> __create_object
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
To solve this problem, switch to printk_safe mode before printing warning
message, this will redirect all printk()-s to a special per-CPU buffer,
which will be flushed later from a safe context (irq work), and this
deadlock problem can be avoided. The proper API to use should be
printk_deferred_enter()/printk_deferred_exit() [2]. Another way is to
place the warn print after kmemleak is released.
[1]
https://lore.kernel.org/all/20250731-kmemleak_lock-v1-1-728fd470198f@debian…
[2]
https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/
====================
Signed-off-by: Gu Bowen <gubowen5(a)huawei.com>
---
mm/kmemleak.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 84265983f239..1ac56ceb29b6 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias,
else if (untagged_objp == untagged_ptr || alias)
return object;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_warn("Found object by alias at 0x%08lx\n",
ptr);
dump_object_info(object);
+ printk_deferred_exit();
break;
}
}
@@ -736,6 +742,11 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
else if (untagged_objp + parent->size <= untagged_ptr)
link = &parent->rb_node.rb_right;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
ptr);
/*
@@ -743,6 +754,7 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
* be freed while the kmemleak_lock is held.
*/
dump_object_info(parent);
+ printk_deferred_exit();
return -EEXIST;
}
}
@@ -856,13 +868,8 @@ static void delete_object_part(unsigned long ptr, size_t size,
raw_spin_lock_irqsave(&kmemleak_lock, flags);
object = __find_and_remove_object(ptr, 1, objflags);
- if (!object) {
-#ifdef DEBUG
- kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
- ptr, size);
-#endif
+ if (!object)
goto unlock;
- }
/*
* Create one or two objects that may result from the memory block
@@ -882,8 +889,14 @@ static void delete_object_part(unsigned long ptr, size_t size,
unlock:
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
- if (object)
+ if (object) {
__delete_object(object);
+ } else {
+#ifdef DEBUG
+ kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
+ ptr, size);
+#endif
+ }
out:
if (object_l)
--
2.43.0
Hi Stable,
Please provide a quote for your products:
Include:
1.Pricing (per unit)
2.Delivery cost & timeline
3.Quote expiry date
Deadline: September
Thanks!
Kamal Prasad
Albinayah Trading
Hi,
We’re offering verified business contact data for the upcoming Fruit Attraction 2025 (FA), tailored for effective outreach before and after the event.
Place: Madrid, Spain
Date:SEP 30 - OCT 02, 2025
Contact Overview:
1,01,351 Attendees
2,179 Exhibiting Companies
6,537 Verified Exhibitor Contacts
Total: 107,885 Business Contacts
Each entry includes: Name, Job Title, Company, Website, Address, Phone, Official Email, LinkedIn Profile, and more.
Get your list in just 48 hours—100% GDPR-compliant Data.
If you'd like more details, just reply: “Send me pricing”
Best regards,
Juanita Garcia
Sr. Marketing Manager
To opt out reply “Not Interested.”
From: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
KCSAN reports:
BUG: KCSAN: data-race in do_raw_write_lock / do_raw_write_lock
write (marked) to 0xffff800009cf504c of 4 bytes by task 1102 on cpu 1:
do_raw_write_lock+0x120/0x204
_raw_write_lock_irq
do_exit
call_usermodehelper_exec_async
ret_from_fork
read to 0xffff800009cf504c of 4 bytes by task 1103 on cpu 0:
do_raw_write_lock+0x88/0x204
_raw_write_lock_irq
do_exit
call_usermodehelper_exec_async
ret_from_fork
value changed: 0xffffffff -> 0x00000001
Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 1103 Comm: kworker/u4:1 6.1.111
Commit 1a365e822372 ("locking/spinlock/debug: Fix various data races") has
adressed most of these races, but seems to be not consistent/not complete.
From do_raw_write_lock() only debug_write_lock_after() part has been
converted to WRITE_ONCE(), but not debug_write_lock_before() part.
Do it now.
Cc: stable(a)vger.kernel.org
Fixes: 1a365e822372 ("locking/spinlock/debug: Fix various data races")
Reported-by: Adrian Freihofer <adrian.freihofer(a)siemens.com>
Acked-by: Waiman Long <longman(a)redhat.com>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin(a)siemens.com>
Reviewed-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Boqun Feng <boqun.feng(a)gmail.com>
---
Notes:
SubmissionLink: https://lore.kernel.org/all/20250826102731.52507-1-alexander.sverdlin@sieme…
kernel/locking/spinlock_debug.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 87b03d2e41db..2338b3adfb55 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -184,8 +184,8 @@ void do_raw_read_unlock(rwlock_t *lock)
static inline void debug_write_lock_before(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
- RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
- RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}
--
2.51.0
Within two-step API update let's provide 2 new MBX operations:
1) request PF's link state (speed & up/down) - as legacy approach became
obsolete for new E610 adapter and link state data can't be correctly
provided - increasing API to 1.6
2) ask PF about supported features - for some time there is quite a mess in
negotiating API versions caused by too loose approach in adding new
specific (not supported by all of the drivers capable of linking with
ixgbevf) feature and corresponding API versions. Now list of supported
features is provided by MBX operation - increasing API to 1.7
Jedrzej Jagielski (4):
ixgbevf: fix getting link speed data for E610 devices
ixgbe: handle IXGBE_VF_GET_PF_LINK_STATE mailbox operation
ixgbevf: fix mailbox API compatibility by negotiating supported
features
ixgbe: handle IXGBE_VF_FEATURES_NEGOTIATE mbox cmd
drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h | 15 ++
.../net/ethernet/intel/ixgbe/ixgbe_sriov.c | 79 ++++++++
drivers/net/ethernet/intel/ixgbevf/defines.h | 1 +
drivers/net/ethernet/intel/ixgbevf/ipsec.c | 10 +
drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 7 +
.../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 34 +++-
drivers/net/ethernet/intel/ixgbevf/mbx.h | 8 +
drivers/net/ethernet/intel/ixgbevf/vf.c | 182 +++++++++++++++---
drivers/net/ethernet/intel/ixgbevf/vf.h | 1 +
9 files changed, 304 insertions(+), 33 deletions(-)
--
2.31.1
Hi Luca Weiss and Tamura Dai,
On 9/12/25 02:24, Luca Weiss wrote:
> Hi Tamura,
>
> On Fri Sep 12, 2025 at 9:01 AM CEST, Tamura Dai wrote:
>> The bug is a typo in the compatible string for the touchscreen node.
>> According to Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml,
>> the correct compatible is "focaltech,ft8719", but the device tree used
>> "focaltech,fts8719".
>
> +Joel
>
> I don't think this patch is really correct, in the sdm845-mainline fork
> there's a different commit which has some more changes to make the
> touchscreen work:
>
> https://gitlab.com/sdm845-mainline/linux/-/commit/2ca76ac2e046158814b043fd4…
Yes, this patch is not correct. My commit from the gitlab repo is the
correct one. But I personally don't have the shiftmq6 device to smoke
test before sending the patch. That's why I was hesitant to send it
upstream. I have now requested someone to confirm if the touchscreen
works with my gitlab commit. If if its all good, I will send the correct
patch later.
Regards,
Joel
Commit 67a873df0c41 ("vhost: basic in order support") pass the number
of used elem to vhost_net_rx_peek_head_len() to make sure it can
signal the used correctly before trying to do busy polling. But it
forgets to clear the count, this would cause the count run out of sync
with handle_rx() and break the busy polling.
Fixing this by passing the pointer of the count and clearing it after
the signaling the used.
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: 67a873df0c41 ("vhost: basic in order support")
Signed-off-by: Jason Wang <jasowang(a)redhat.com>
---
drivers/vhost/net.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c6508fe0d5c8..16e39f3ab956 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1014,7 +1014,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
}
static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
- bool *busyloop_intr, unsigned int count)
+ bool *busyloop_intr, unsigned int *count)
{
struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
@@ -1024,7 +1024,8 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
if (!len && rvq->busyloop_timeout) {
/* Flush batched heads first */
- vhost_net_signal_used(rnvq, count);
+ vhost_net_signal_used(rnvq, *count);
+ *count = 0;
/* Both tx vq and rx socket were polled here */
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
@@ -1180,7 +1181,7 @@ static void handle_rx(struct vhost_net *net)
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
- &busyloop_intr, count);
+ &busyloop_intr, &count);
if (!sock_len)
break;
sock_len += sock_hlen;
--
2.34.1
The specification, Section 7.10, "Software Steps to Drain Page Requests &
Responses," requires software to submit an Invalidation Wait Descriptor
(inv_wait_dsc) with the Page-request Drain (PD=1) flag set, along with
the Invalidation Wait Completion Status Write flag (SW=1). It then waits
for the Invalidation Wait Descriptor's completion.
However, the PD field in the Invalidation Wait Descriptor is optional, as
stated in Section 6.5.2.9, "Invalidation Wait Descriptor":
"Page-request Drain (PD): Remapping hardware implementations reporting
Page-request draining as not supported (PDS = 0 in ECAP_REG) treat this
field as reserved."
This implies that if the IOMMU doesn't support the PDS capability, software
can't drain page requests and group responses as expected.
Do not enable PCI/PRI if the IOMMU doesn't support PDS.
Reported-by: Joel Granados <joel.granados(a)kernel.org>
Closes: https://lore.kernel.org/r/20250909-jag-pds-v1-1-ad8cba0e494e@kernel.org
Fixes: 66ac4db36f4c ("iommu/vt-d: Add page request draining support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
---
drivers/iommu/intel/iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9c3ab9d9f69a..92759a8f8330 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3812,7 +3812,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
if (info->ats_supported && ecap_prs(iommu->ecap) &&
- pci_pri_supported(pdev))
+ ecap_pds(iommu->ecap) && pci_pri_supported(pdev))
info->pri_supported = 1;
}
}
--
2.43.0
In order to set the AMCR register, which configures the
memory-region split between ospi1 and ospi2, we need to
identify the ospi instance.
By using memory-region-names, it allows to identify the
ospi instance this memory-region belongs to.
Fixes: cad2492de91c ("arm64: dts: st: Add SPI NOR flash support on stm32mp257f-ev1 board")
Cc: stable(a)vger.kernel.org
Signed-off-by: Patrice Chotard <patrice.chotard(a)foss.st.com>
---
Changes in v3:
- Set again "Cc: <stable(a)vger.kernel.org>"
- Link to v2: https://lore.kernel.org/r/20250811-upstream_fix_dts_omm-v2-1-00ff55076bd5@f…
Changes in v2:
- Update commit message.
- Use correct memory-region-names value.
- Remove "Cc: <stable(a)vger.kernel.org>" tag as the fixed patch is not part of a LTS.
- Link to v1: https://lore.kernel.org/r/20250806-upstream_fix_dts_omm-v1-1-e68c15ed422d@f…
---
arch/arm64/boot/dts/st/stm32mp257f-ev1.dts | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
index 2f561ad4066544445e93db78557bc4be1c27095a..7bd8433c1b4344bb5d58193a5e6314f9ae89e0a4 100644
--- a/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
+++ b/arch/arm64/boot/dts/st/stm32mp257f-ev1.dts
@@ -197,6 +197,7 @@ &i2c8 {
&ommanager {
memory-region = <&mm_ospi1>;
+ memory-region-names = "ospi1";
pinctrl-0 = <&ospi_port1_clk_pins_a
&ospi_port1_io03_pins_a
&ospi_port1_cs0_pins_a>;
---
base-commit: 038d61fd642278bab63ee8ef722c50d10ab01e8f
change-id: 20250806-upstream_fix_dts_omm-c006b69042f1
Best regards,
--
Patrice Chotard <patrice.chotard(a)foss.st.com>
In the IOMMU Shared Virtual Addressing (SVA) context, the IOMMU hardware
shares and walks the CPU's page tables. The x86 architecture maps the
kernel's virtual address space into the upper portion of every process's
page table. Consequently, in an SVA context, the IOMMU hardware can walk
and cache kernel page table entries.
The Linux kernel currently lacks a notification mechanism for kernel page
table changes, specifically when page table pages are freed and reused.
The IOMMU driver is only notified of changes to user virtual address
mappings. This can cause the IOMMU's internal caches to retain stale
entries for kernel VA.
A Use-After-Free (UAF) and Write-After-Free (WAF) condition arises when
kernel page table pages are freed and later reallocated. The IOMMU could
misinterpret the new data as valid page table entries. The IOMMU might
then walk into attacker-controlled memory, leading to arbitrary physical
memory DMA access or privilege escalation. This is also a Write-After-Free
issue, as the IOMMU will potentially continue to write Accessed and Dirty
bits to the freed memory while attempting to walk the stale page tables.
Currently, SVA contexts are unprivileged and cannot access kernel
mappings. However, the IOMMU will still walk kernel-only page tables
all the way down to the leaf entries, where it realizes the mapping
is for the kernel and errors out. This means the IOMMU still caches
these intermediate page table entries, making the described vulnerability
a real concern.
To mitigate this, a new IOMMU interface is introduced to flush IOTLB
entries for the kernel address space. This interface is invoked from the
x86 architecture code that manages combined user and kernel page tables,
specifically before any kernel page table page is freed and reused.
This addresses the main issue with vfree() which is a common occurrence
and can be triggered by unprivileged users. While this resolves the
primary problem, it doesn't address some extremely rare case related to
memory unplug of memory that was present as reserved memory at boot,
which cannot be triggered by unprivileged users. The discussion can be
found at the link below.
Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices")
Cc: stable(a)vger.kernel.org
Suggested-by: Jann Horn <jannh(a)google.com>
Co-developed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg(a)nvidia.com>
Reviewed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Link: https://lore.kernel.org/linux-iommu/04983c62-3b1d-40d4-93ae-34ca04b827e5@in…
---
drivers/iommu/iommu-sva.c | 29 ++++++++++++++++++++++++++++-
include/linux/iommu.h | 4 ++++
mm/pgtable-generic.c | 2 ++
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 1a51cfd82808..d236aef80a8d 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -10,6 +10,8 @@
#include "iommu-priv.h"
static DEFINE_MUTEX(iommu_sva_lock);
+static bool iommu_sva_present;
+static LIST_HEAD(iommu_sva_mms);
static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
struct mm_struct *mm);
@@ -42,6 +44,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
return ERR_PTR(-ENOSPC);
}
iommu_mm->pasid = pasid;
+ iommu_mm->mm = mm;
INIT_LIST_HEAD(&iommu_mm->sva_domains);
/*
* Make sure the write to mm->iommu_mm is not reordered in front of
@@ -132,8 +135,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
if (ret)
goto out_free_domain;
domain->users = 1;
- list_add(&domain->next, &mm->iommu_mm->sva_domains);
+ if (list_empty(&iommu_mm->sva_domains)) {
+ if (list_empty(&iommu_sva_mms))
+ iommu_sva_present = true;
+ list_add(&iommu_mm->mm_list_elm, &iommu_sva_mms);
+ }
+ list_add(&domain->next, &iommu_mm->sva_domains);
out:
refcount_set(&handle->users, 1);
mutex_unlock(&iommu_sva_lock);
@@ -175,6 +183,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
list_del(&domain->next);
iommu_domain_free(domain);
}
+
+ if (list_empty(&iommu_mm->sva_domains)) {
+ list_del(&iommu_mm->mm_list_elm);
+ if (list_empty(&iommu_sva_mms))
+ iommu_sva_present = false;
+ }
+
mutex_unlock(&iommu_sva_lock);
kfree(handle);
}
@@ -312,3 +327,15 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
return domain;
}
+
+void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end)
+{
+ struct iommu_mm_data *iommu_mm;
+
+ guard(mutex)(&iommu_sva_lock);
+ if (!iommu_sva_present)
+ return;
+
+ list_for_each_entry(iommu_mm, &iommu_sva_mms, mm_list_elm)
+ mmu_notifier_arch_invalidate_secondary_tlbs(iommu_mm->mm, start, end);
+}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c30d12e16473..66e4abb2df0d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1134,7 +1134,9 @@ struct iommu_sva {
struct iommu_mm_data {
u32 pasid;
+ struct mm_struct *mm;
struct list_head sva_domains;
+ struct list_head mm_list_elm;
};
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode);
@@ -1615,6 +1617,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end);
#else
static inline struct iommu_sva *
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1639,6 +1642,7 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
}
static inline void mm_pasid_drop(struct mm_struct *mm) {}
+static inline void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end) {}
#endif /* CONFIG_IOMMU_SVA */
#ifdef CONFIG_IOMMU_IOPF
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 0279399d4910..2717dc9afff0 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mm_inline.h>
+#include <linux/iommu.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -430,6 +431,7 @@ static void kernel_pgtable_work_func(struct work_struct *work)
list_splice_tail_init(&kernel_pgtable_work.list, &page_list);
spin_unlock(&kernel_pgtable_work.lock);
+ iommu_sva_invalidate_kva_range(PAGE_OFFSET, TLB_FLUSH_ALL);
list_for_each_entry_safe(pt, next, &page_list, pt_list)
__pagetable_free(pt);
}
--
2.43.0
When do_task() exhausts its iteration budget (!ret), it sets the state
to TASK_STATE_IDLE to reschedule, without a secondary check on the
current task->state. This can overwrite the TASK_STATE_DRAINING state
set by a concurrent call to rxe_cleanup_task() or rxe_disable_task().
While state changes are protected by a spinlock, both rxe_cleanup_task()
and rxe_disable_task() release the lock while waiting for the task to
finish draining in the while(!is_done(task)) loop. The race occurs if
do_task() hits its iteration limit and acquires the lock in this window.
The cleanup logic may then proceed while the task incorrectly
reschedules itself, leading to a potential use-after-free.
This bug was introduced during the migration from tasklets to workqueues,
where the special handling for the draining case was lost.
Fix this by restoring the original pre-migration behavior. If the state is
TASK_STATE_DRAINING when iterations are exhausted, set cont to 1 to
force a new loop iteration. This allows the task to finish its work, so
that a subsequent iteration can reach the switch statement and correctly
transition the state to TASK_STATE_DRAINED, stopping the task as intended.
Fixes: 9b4b7c1f9f54 ("RDMA/rxe: Add workqueue support for rxe tasks")
Cc: stable(a)vger.kernel.org
Reviewed-by: Zhu Yanjun <yanjun.zhu(a)linux.dev>
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
v2:
* Rewrite commit message for clarity. Thanks to Zhu Yanjun for the review.
---
drivers/infiniband/sw/rxe/rxe_task.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 6f8f353e9583..f522820b950c 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -132,8 +132,12 @@ static void do_task(struct rxe_task *task)
* yield the cpu and reschedule the task
*/
if (!ret) {
- task->state = TASK_STATE_IDLE;
- resched = 1;
+ if (task->state != TASK_STATE_DRAINING) {
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
+ } else {
+ cont = 1;
+ }
goto exit;
}
--
2.25.1
Hi Stable,
Please provide a quote for your products:
Include:
1.Pricing (per unit)
2.Delivery cost & timeline
3.Quote expiry date
Deadline: September
Thanks!
Kamal Prasad
Albinayah Trading
When do_task() exhausts its RXE_MAX_ITERATIONS budget, it unconditionally
sets the task state to TASK_STATE_IDLE to reschedule. This overwrites
the TASK_STATE_DRAINING state that may have been concurrently set by
rxe_cleanup_task() or rxe_disable_task().
This race condition breaks the cleanup and disable logic, which expects
the task to stop processing new work. The cleanup code may proceed while
do_task() reschedules itself, leading to a potential use-after-free.
This bug was introduced during the migration from tasklets to workqueues,
where the special handling for the draining case was lost.
Fix this by restoring the original behavior. If the state is
TASK_STATE_DRAINING when iterations are exhausted, continue the loop by
setting cont to 1. This allows new iterations to finish the remaining
work and reach the switch statement, which properly transitions the
state to TASK_STATE_DRAINED and stops the task as intended.
Fixes: 9b4b7c1f9f54 ("RDMA/rxe: Add workqueue support for rxe tasks")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gui-Dong Han <hanguidong02(a)gmail.com>
---
drivers/infiniband/sw/rxe/rxe_task.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 6f8f353e9583..f522820b950c 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -132,8 +132,12 @@ static void do_task(struct rxe_task *task)
* yield the cpu and reschedule the task
*/
if (!ret) {
- task->state = TASK_STATE_IDLE;
- resched = 1;
+ if (task->state != TASK_STATE_DRAINING) {
+ task->state = TASK_STATE_IDLE;
+ resched = 1;
+ } else {
+ cont = 1;
+ }
goto exit;
}
--
2.25.1
An untrusted device may return a NULL context pointer in the request
header. hptiop_iop_request_callback_itl() dereferences that pointer
unconditionally to write result fields and to invoke arg->done(), which
can cause a NULL pointer dereference.
Add a NULL check for the reconstructed context pointer. If it is NULL,
acknowledge the request by writing the tag to the outbound queue and
return early.
Fixes: ede1e6f8b432 ("[SCSI] hptiop: HighPoint RocketRAID 3xxx controller driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
drivers/scsi/hptiop.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 21f1d9871a33..2b29cd83ce5e 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -812,6 +812,11 @@ static void hptiop_iop_request_callback_itl(struct hptiop_hba *hba, u32 tag)
(readl(&req->context) |
((u64)readl(&req->context_hi32)<<32));
+ if (!arg) {
+ writel(tag, &hba->u.itl.iop->outbound_queue);
+ return;
+ }
+
if (readl(&req->result) == IOP_RESULT_SUCCESS) {
arg->result = HPT_IOCTL_RESULT_OK;
--
2.43.0
If ab->fw.m3_data points to data, then fw pointer remains null.
Further, if m3_mem is not allocated, then fw is dereferenced to be
passed to ath11k_err function.
Replace fw->size by m3_len.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 7db88b962f06 ("wifi: ath11k: add firmware-2.bin support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Matvey Kovalev <matvey.kovalev(a)ispras.ru>
---
drivers/net/wireless/ath/ath11k/qmi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c
index 378ac96b861b7..1a42b4abe7168 100644
--- a/drivers/net/wireless/ath/ath11k/qmi.c
+++ b/drivers/net/wireless/ath/ath11k/qmi.c
@@ -2557,7 +2557,7 @@ static int ath11k_qmi_m3_load(struct ath11k_base *ab)
GFP_KERNEL);
if (!m3_mem->vaddr) {
ath11k_err(ab, "failed to allocate memory for M3 with size %zu\n",
- fw->size);
+ m3_len);
ret = -ENOMEM;
goto out;
}
--
2.43.0.windows.1
Our implementation for BAR2 (lmembar) resize works at the xe_vram layer
and only releases that BAR before resizing. That is not always
sufficient. If the parent bridge needs to move, the BAR0 also needs to
be released, otherwise the resize fails. This is the case of not having
enough space allocated from the beginning.
Also, there's a BAR0 in the upstream port of the pcie switch in BMG
preventing the resize to propagate to the bridge as previously discussed
in https://lore.kernel.org/intel-xe/20250721173057.867829-1-uwu@icenowy.me/
and https://lore.kernel.org/intel-xe/wqukxnxni2dbpdhri3cbvlrzsefgdanesgskzmxi5s…
I'm bringing that commit from Ilpo here so this can be tested with the
xe changes and propagate to stable. Note that the use of a pci fixup is
not ideal, but without intrusive changes on resource fitting it's
possibly the best alternative. I also have confirmation from HW folks
that the BAR in the upstream port has no production use.
I have more cleanups on top on the xe side, but those conflict with some
refactors Ilpo is working on as prep for the resource fitting, so I will
wait things settle to submit again.
I propose to take this through the drm tree.
With this I could resize the lmembar on some problematic hosts and after
doing an SBR, with one caveat: the audio device also prevents the BAR
from moving and it needs to be manually removed before resizing. With
the PCI refactors and BAR fitting logic that Ilpo is working on, it's
expected that it won't be needed for a long time.
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
Ilpo Järvinen (1):
PCI: Release BAR0 of an integrated bridge to allow GPU BAR resize
Lucas De Marchi (1):
drm/xe: Move rebar to be done earlier
drivers/gpu/drm/xe/xe_pci.c | 2 ++
drivers/gpu/drm/xe/xe_vram.c | 34 ++++++++++++++++++++++++++--------
drivers/gpu/drm/xe/xe_vram.h | 1 +
drivers/pci/quirks.c | 23 +++++++++++++++++++++++
4 files changed, 52 insertions(+), 8 deletions(-)
base-commit: 8031d70dbb4201841897de480cec1f9750d4a5dc
change-id: 20250917-xe-pci-rebar-2-c0fe2f04c879
Lucas De Marchi
Hi Zhang, hi Jiri,
In Debian Staffan Melin reported that after an update containing the
commit 1a8953f4f774 ("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY"),
the input device with same idVendor and idProduct, the Jieli
Technology USB Composite Device, does not get recognized anymore.
The full Debian report is at: https://bugs.debian.org/1114557
The issue is not specific to the 6.12.y series and confirmed in 6.16.3
as well.
Staffan Melin did bisect the kernels between 6.12.38 (which was still
working) and 6.1.41 (which was not), confirming by bisection that the
offending commit is
1a8953f4f774 ("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY")
#regzbot introduced: 1a8953f4f774
#regzbot monitor: https://bugs.debian.org/1114557
So it looks that the quirk applied is unfortunately affecting
negatively as well Staffan Melin case.
Can you have a look?
Regards,
Salvatore
The SolidRun CN9130 SoC based boards have a variety of functional
problems, in particular
- SATA ports
- CN9132 CEX-7 eMMC
- CN9132 Clearfog PCI-E x2 / x4 ports
are not functional.
The SATA issue was recently introduced via changes to the
armada-cp11x.dtsi, wheras the eMMC and SPI problems were present in the
board dts from the very beginning.
This patch-set aims to resolve the problems after testing on Debian 13
release (Linux v6.12).
Signed-off-by: Josua Mayer <josua(a)solid-run.com>
---
Changes in v2:
- fixed mistakes in the original board device-trees that caused
functional issues with eMMC and pci.
- Link to v1: https://lore.kernel.org/r/20250911-cn913x-sr-fix-sata-v1-1-9e72238d0988@sol…
---
Josua Mayer (4):
arm64: dts: marvell: cn913x-solidrun: fix sata ports status
arm64: dts: marvell: cn9132-clearfog: disable eMMC high-speed modes
arm64: dts: marvell: cn9132-clearfog: fix multi-lane pci x2 and x4 ports
arm64: dts: marvell: cn9130-sr-som: add missing properties to emmc
arch/arm64/boot/dts/marvell/cn9130-cf.dtsi | 7 ++++---
arch/arm64/boot/dts/marvell/cn9130-sr-som.dtsi | 2 ++
arch/arm64/boot/dts/marvell/cn9131-cf-solidwan.dts | 6 ++++--
arch/arm64/boot/dts/marvell/cn9132-clearfog.dts | 22 ++++++++++++++++------
arch/arm64/boot/dts/marvell/cn9132-sr-cex7.dtsi | 8 ++++++++
5 files changed, 34 insertions(+), 11 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250911-cn913x-sr-fix-sata-5c737ebdb97f
Best regards,
--
Josua Mayer <josua(a)solid-run.com>
We sell 13,120 Ultra Targeted Emails and with these Emails you can
successfully sell your ebook, your training or any other digital product
today
https://vlykohda.mychariow.com/eng
Nous vendons 13 120 Emails Ultra Ciblés et grâce à ces Emails tu peux
vendre avec succès ton ebook, ta formation ou tout autre produit digital
dès aujourd’hui
https://vlykohda.mychariow.com/149
devm_kcalloc() may fail. ndtest_probe() allocates three DMA address
arrays (dcr_dma, label_dma, dimm_dma) and later unconditionally uses
them in ndtest_nvdimm_init(), which can lead to a NULL pointer
dereference on allocation failure.
Add NULL checks for all three allocations and return -ENOMEM if any
allocation fails.
Fixes: 9399ab61ad82 ("ndtest: Add dimms to the two buses")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
tools/testing/nvdimm/test/ndtest.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index 68a064ce598c..516f304bb0b9 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -855,6 +855,11 @@ static int ndtest_probe(struct platform_device *pdev)
p->dimm_dma = devm_kcalloc(&p->pdev.dev, NUM_DCR,
sizeof(dma_addr_t), GFP_KERNEL);
+ if (!p->dcr_dma || !p->label_dma || !p->dimm_dma) {
+ pr_err("%s: failed to allocate DMA address arrays\n", __func__);
+ return -ENOMEM;
+ }
+
rc = ndtest_nvdimm_init(p);
if (rc)
goto err;
--
2.43.0
This patch series enables a future version of tune2fs to be able to
modify certain parts of the ext4 superblock without to write to the
block device.
The first patch fixes a potential buffer overrun caused by a
maliciously moified superblock. The second patch adds support for
32-bit uid and gid's which can have access to the reserved blocks pool.
The last patch adds the ioctl's which will be used by tune2fs.
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
---
Changes in v2:
- fix bugs that were detected using sparse
- remove tune (unsafe) ability to clear certain compat faatures
- add the ability to set the encoding and encoding flags for case folding
- Link to v1: https://lore.kernel.org/r/20250908-tune2fs-v1-0-e3a6929f3355@mit.edu
---
Theodore Ts'o (3):
ext4: avoid potential buffer over-read in parse_apply_sb_mount_options()
ext4: add support for 32-bit default reserved uid and gid values
ext4: implemet new ioctls to set and get superblock parameters
fs/ext4/ext4.h | 16 +++-
fs/ext4/ioctl.c | 312 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
fs/ext4/super.c | 25 +++----
include/uapi/linux/ext4.h | 53 +++++++++++++
4 files changed, 382 insertions(+), 24 deletions(-)
---
base-commit: b320789d6883cc00ac78ce83bccbfe7ed58afcf0
change-id: 20250830-tune2fs-3376beb72403
Best regards,
--
Theodore Ts'o <tytso(a)mit.edu>
Hi,
I would like to request backporting 5326ab737a47 ("virtio_console: fix
order of fields cols and rows") to all LTS kernels.
I'm working on QEMU patches that add virtio console size support.
Without the fix, rows and columns will be swapped.
As far as I know, there are no device implementations that use the
wrong order and would by broken by the fix.
Note: A previous version [1] of the patch contained "Cc: stable" and
"Fixes:" tags, but they seem to have been accidentally left out from
the final version.
[1]: https://lore.kernel.org/all/20250320172654.624657-1-maxbr@linux.ibm.com/
Thanks,
Filip Hejsek
kcalloc_node() may fail. When the interrupter array allocation returns
NULL, subsequent code uses xhci->interrupters (e.g. in xhci_add_interrupter()
and in cleanup paths), leading to a potential NULL pointer dereference.
Check the allocation and bail out to the existing fail path to avoid
the NULL dereference.
Fixes: c99b38c412343 ("xhci: add support to allocate several interrupters")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
drivers/usb/host/xhci-mem.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index d698095fc88d..da257856e864 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -2505,7 +2505,8 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
"Allocating primary event ring");
xhci->interrupters = kcalloc_node(xhci->max_interrupters, sizeof(*xhci->interrupters),
flags, dev_to_node(dev));
-
+ if (!xhci->interrupters)
+ goto fail;
ir = xhci_alloc_interrupter(xhci, 0, flags);
if (!ir)
goto fail;
--
2.43.0
The following changes since commit 76eeb9b8de9880ca38696b2fb56ac45ac0a25c6c:
Linux 6.17-rc5 (2025-09-07 14:22:57 -0700)
are available in the Git repository at:
https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git tags/for_linus
for you to fetch changes up to 549db78d951726646ae9468e86c92cbd1fe73595:
virtio_config: clarify output parameters (2025-09-16 05:37:03 -0400)
----------------------------------------------------------------
virtio,vhost: last minute fixes
More small fixes. Most notably this reverts a virtio console
change since we made it without considering compatibility
sufficiently.
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
----------------------------------------------------------------
Alok Tiwari (1):
vhost-scsi: fix argument order in tport allocation error message
Alyssa Ross (1):
virtio_config: clarify output parameters
Ashwini Sahu (1):
uapi: vduse: fix typo in comment
Michael S. Tsirkin (1):
Revert "virtio_console: fix order of fields cols and rows"
Sean Christopherson (3):
vhost_task: Don't wake KVM x86's recovery thread if vhost task was killed
vhost_task: Allow caller to omit handle_sigkill() callback
KVM: x86/mmu: Don't register a sigkill callback for NX hugepage recovery tasks
zhang jiao (1):
vhost: vringh: Modify the return value check
arch/x86/kvm/mmu/mmu.c | 7 +-----
drivers/char/virtio_console.c | 2 +-
drivers/vhost/scsi.c | 2 +-
drivers/vhost/vhost.c | 2 +-
drivers/vhost/vringh.c | 7 +++---
include/linux/sched/vhost_task.h | 1 +
include/linux/virtio_config.h | 11 ++++----
include/uapi/linux/vduse.h | 2 +-
kernel/vhost_task.c | 54 ++++++++++++++++++++++++++++++++++++----
9 files changed, 65 insertions(+), 23 deletions(-)
Two kcalloc() allocations (descriptor table and context table) can fail
and are used unconditionally afterwards (ALIGN()/phys conversion and
dereferences), leading to potential NULL pointer dereference.
Check both allocations and bail out early; on the second failure, free
the first allocation to avoid a leak. Do not emit extra OOM logs.
Fixes: 73d739698017 ("sb1250-mac.c: De-typedef, de-volatile, de-etc...")
Fixes: c477f3348abb ("drivers/net/sb1250-mac.c: kmalloc + memset conversion to kcalloc")
Cc: stable(a)vger.kernel.org
Signed-off-by: Guangshuo Li <lgs201920130244(a)gmail.com>
---
drivers/net/ethernet/broadcom/sb1250-mac.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 30865fe03eeb..e16a49e22488 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -625,6 +625,8 @@ static void sbdma_initctx(struct sbmacdma *d, struct sbmac_softc *s, int chan,
d->sbdma_dscrtable_unaligned = kcalloc(d->sbdma_maxdescr + 1,
sizeof(*d->sbdma_dscrtable),
GFP_KERNEL);
+ if (!d->sbdma_dscrtable_unaligned)
+ return; /* avoid NULL deref in ALIGN/phys conversion */
/*
* The descriptor table must be aligned to at least 16 bytes or the
@@ -644,7 +646,11 @@ static void sbdma_initctx(struct sbmacdma *d, struct sbmac_softc *s, int chan,
d->sbdma_ctxtable = kcalloc(d->sbdma_maxdescr,
sizeof(*d->sbdma_ctxtable), GFP_KERNEL);
-
+ if (!d->sbdma_ctxtable) {
+ kfree(d->sbdma_dscrtable_unaligned);
+ d->sbdma_dscrtable_unaligned = NULL;
+ return;
+ }
#ifdef CONFIG_SBMAC_COALESCE
/*
* Setup Rx/Tx DMA coalescing defaults
--
2.43.0
Hi,
This series fixes the long standing issue with ACS in DT platforms. There are
two fixes in this series, both fixing independent issues on their own, but both
are needed to properly enable ACS on DT platforms (well, patch 1 is only needed
for Juno board, but that was a blocker for patch 2, more below...).
Issue(s) background
===================
Back in 2024, Xingang Wang first noted a failure in attaching the HiSilicon SEC
device to QEMU ARM64 pci-root-port device [1]. He then tracked down the issue to
ACS not being enabled for the QEMU Root Port device and he proposed a patch to
fix it [2].
Once the patch got applied, people reported PCIe issues with linux-next on the
ARM Juno Development boards, where they saw failure in enumerating the endpoint
devices [3][4]. So soon, the patch got dropped, but the actual issue with the
ARM Juno boards was left behind.
Fast forward to 2024, Pavan resubmitted the same fix [5] for his own usecase,
hoping that someone in the community would fix the issue with ARM Juno boards.
But the patch was rightly rejected, as a patch that was known to cause issues
should not be merged to the kernel. But again, no one investigated the Juno
issue and it was left behind again.
Now it ended up in my plate and I managed to track down the issue with the help
of Naresh who got access to the Juno boards in LKFT. The Juno issue is with the
PCIe switch from Microsemi/IDT, which triggers ACS Source Validation error on
Completions received for the Configuration Read Request from a device connected
to the downstream port that has not yet captured the PCIe bus number. As per the
PCIe spec r6.0 sec 2.2.6.2, "Functions must capture the Bus and Device Numbers
supplied with all Type 0 Configuration Write Requests completed by the Function
and supply these numbers in the Bus and Device Number fields of the Requester ID
for all Requests". So during the first Configuration Read Request issued by the
switch downstream port during enumeration (for reading Vendor ID), Bus and
Device numbers will be unknown to the device. So it responds to the Read Request
with Completion having Bus and Device number as 0. The switch interprets the
Completion as an ACS Source Validation error and drops the completion, leading
to the failure in detecting the endpoint device. Though the PCIe spec r6.0, sec
6.12.1.1, states that "Completions are never affected by ACS Source Validation".
This behavior is in violation of the spec.
This issue was already found and addressed with a quirk for a different device
from Microsemi with 'commit, aa667c6408d2 ("PCI: Workaround IDT switch ACS
Source Validation erratum")'. Apparently, this issue seems to be documented in
the erratum #36 of IDT 89H32H8G3-YC, which is not publicly available.
Solution for Juno issue
=======================
To fix this issue, I've extended the quirk to the Device ID of the switch
found in Juno R2 boards. I believe the same switch is also present in Juno R1
board as well.
With Patch 1, the Juno R2 boards can now detect the endpoints even with ACS
enabled for the Switch downstream ports. Finally, I added patch 2 that properly
enables ACS for all the PCI devices on DT platforms.
It should be noted that even without patch 2 which enables ACS for the Root
Port, the Juno boards were failing since 'commit, bcb81ac6ae3c ("iommu: Get
DT/ACPI parsing into the proper probe path")' as reported in LKFT [6]. I
believe, this commit made sure pci_request_acs() gets called before the
enumeration of the switch downstream ports. The LKFT team ended up disabling
ACS using cmdline param 'pci=config_acs=000000@pci:0:0'. So I added the above
mentioned commit as a Fixes tag for patch 1.
Also, to mitigate this issue, one could enumerate all the PCIe devices in
bootloader without enabling ACS (as also noted by Robin in the LKFT thread).
This will make sure that the endpoint device has a valid bus number when it
responds to the first Configuration Read Request from the switch downstream
port. So the ACS Source Validation error doesn't get triggered.
Solution for ACS issue
======================
To fix this issue, I've kept the patch from Xingang as is (with rewording of the
patch subject/description). This patch moves the pci_request_acs() call to
devm_of_pci_bridge_init(), which gets called during the host bridge
registration. This makes sure that the 'pci_acs_enable' flag set by
pci_request_acs() is getting set before the enumeration of the Root Port device.
So now, ACS will be enabled for all ACS capable devices of DT platforms.
[1] https://lore.kernel.org/all/038397a6-57e2-b6fc-6e1c-7c03b7be9d96@huawei.com
[2] https://lore.kernel.org/all/1621566204-37456-1-git-send-email-wangxingang5@…
[3] https://lore.kernel.org/all/01314d70-41e6-70f9-e496-84091948701a@samsung.com
[4] https://lore.kernel.org/all/CADYN=9JWU3CMLzMEcD5MSQGnaLyDRSKc5SofBFHUax6YuT…
[5] https://lore.kernel.org/linux-pci/20241107-pci_acs_fix-v1-1-185a2462a571@qu…
[6] https://lists.linaro.org/archives/list/lkft-triage@lists.linaro.org/message…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
---
Manivannan Sadhasivam (1):
PCI: Extend pci_idt_bus_quirk() for IDT switch with Device ID 0x8090
Xingang Wang (1):
iommu/of: Call pci_request_acs() before enumerating the Root Port device
drivers/iommu/of_iommu.c | 1 -
drivers/pci/of.c | 8 +++++++-
drivers/pci/probe.c | 2 +-
3 files changed, 8 insertions(+), 3 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250910-pci-acs-cb4fa3983a2c
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
Our implementation for BAR2 (lmembar) resize works at the xe_vram layer
and only releases that BAR before resizing. That is not always
sufficient. If the parent bridge needs to move, the BAR0 also needs to
be released, otherwise the resize fails. This is the case of not having
enough space allocated from the beginning.
Also, there's a BAR0 in the upstream port of the pcie switch in BMG
preventing the resize to propagate to the bridge as previously discussed
at https://lore.kernel.org/intel-xe/20250721173057.867829-1-uwu@icenowy.me/
and https://lore.kernel.org/intel-xe/wqukxnxni2dbpdhri3cbvlrzsefgdanesgskzmxi5s…
I'm bringing that commit from Ilpo here so this can be tested with the
xe changes and go to stable (first 2 patches).
The third patch is just code move as all the logic is in a different
layer now. That could wait longer though as there are other refactors
coming through the PCI tree and that would conflict (see second link
above).
With this I could resize the lmembar on some problematic hosts and after
doing an SBR, with one caveat: the audio device also prevents the BAR
from moving and it needs to be manually removed before resizing. With
the PCI refactors and BAR fitting logic that Ilpo is working on, it's
expected that it won't be needed for a long time.
Signed-off-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
---
Ilpo Järvinen (1):
PCI: Release BAR0 of an integrated bridge to allow GPU BAR resize
Lucas De Marchi (2):
drm/xe: Move rebar to be done earlier
drm/xe: Move rebar to its own file
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/xe_pci.c | 3 +
drivers/gpu/drm/xe/xe_pci_rebar.c | 125 ++++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_pci_rebar.h | 13 ++++
drivers/gpu/drm/xe/xe_vram.c | 103 -------------------------------
drivers/pci/quirks.c | 20 ++++++
6 files changed, 162 insertions(+), 103 deletions(-)
base-commit: 95bc43e85f952ef4ebfff1406883e1e07a7daeda
change-id: 20250917-xe-pci-rebar-2-c0fe2f04c879
Lucas De Marchi
The VMA count limit check in do_mmap() and do_brk_flags() uses a
strict inequality (>), which allows a process's VMA count to exceed
the configured sysctl_max_map_count limit by one.
A process with mm->map_count == sysctl_max_map_count will incorrectly
pass this check and then exceed the limit upon allocation of a new VMA
when its map_count is incremented.
Other VMA allocation paths, such as split_vma(), already use the
correct, inclusive (>=) comparison.
Fix this bug by changing the comparison to be inclusive in do_mmap()
and do_brk_flags(), bringing them in line with the correct behavior
of other allocation paths.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: <stable(a)vger.kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Pedro Falcato <pfalcato(a)suse.de>
Signed-off-by: Kalesh Singh <kaleshsingh(a)google.com>
---
Chnages in v2:
- Fix mmap check, per Pedro
mm/mmap.c | 2 +-
mm/vma.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 7306253cc3b5..e5370e7fcd8f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -374,7 +374,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
return -EOVERFLOW;
/* Too many mappings? */
- if (mm->map_count > sysctl_max_map_count)
+ if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
/*
diff --git a/mm/vma.c b/mm/vma.c
index 3b12c7579831..033a388bc4b1 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2772,7 +2772,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
return -ENOMEM;
- if (mm->map_count > sysctl_max_map_count)
+ if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
--
2.51.0.384.g4c02a37b29-goog