There are some AA deadlock issues in kmemleak, similar to the situation
reported by Breno [1]. The deadlock path is as follows:
mem_pool_alloc()
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
-> pr_warn()
-> netconsole subsystem
-> netpoll
-> __alloc_skb
-> __create_object
-> raw_spin_lock_irqsave(&kmemleak_lock, flags);
To solve this problem, switch to printk_safe mode before printing warning
message, this will redirect all printk()-s to a special per-CPU buffer,
which will be flushed later from a safe context (irq work), and this
deadlock problem can be avoided. The proper API to use should be
printk_deferred_enter()/printk_deferred_exit() [2]. Another way is to
place the warn print after kmemleak is released.
[1]
https://lore.kernel.org/all/20250731-kmemleak_lock-v1-1-728fd470198f@debian…
[2]
https://lore.kernel.org/all/5ca375cd-4a20-4807-b897-68b289626550@redhat.com/
====================
Signed-off-by: Gu Bowen <gubowen5(a)huawei.com>
---
mm/kmemleak.c | 27 ++++++++++++++++++++-------
1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 84265983f239..1ac56ceb29b6 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -437,9 +437,15 @@ static struct kmemleak_object *__lookup_object(unsigned long ptr, int alias,
else if (untagged_objp == untagged_ptr || alias)
return object;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_warn("Found object by alias at 0x%08lx\n",
ptr);
dump_object_info(object);
+ printk_deferred_exit();
break;
}
}
@@ -736,6 +742,11 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
else if (untagged_objp + parent->size <= untagged_ptr)
link = &parent->rb_node.rb_right;
else {
+ /*
+ * Printk deferring due to the kmemleak_lock held.
+ * This is done to avoid deadlock.
+ */
+ printk_deferred_enter();
kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
ptr);
/*
@@ -743,6 +754,7 @@ static int __link_object(struct kmemleak_object *object, unsigned long ptr,
* be freed while the kmemleak_lock is held.
*/
dump_object_info(parent);
+ printk_deferred_exit();
return -EEXIST;
}
}
@@ -856,13 +868,8 @@ static void delete_object_part(unsigned long ptr, size_t size,
raw_spin_lock_irqsave(&kmemleak_lock, flags);
object = __find_and_remove_object(ptr, 1, objflags);
- if (!object) {
-#ifdef DEBUG
- kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
- ptr, size);
-#endif
+ if (!object)
goto unlock;
- }
/*
* Create one or two objects that may result from the memory block
@@ -882,8 +889,14 @@ static void delete_object_part(unsigned long ptr, size_t size,
unlock:
raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
- if (object)
+ if (object) {
__delete_object(object);
+ } else {
+#ifdef DEBUG
+ kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
+ ptr, size);
+#endif
+ }
out:
if (object_l)
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 55d225670def06b01af2e7a5e0446fbe946289e8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092935-hatred-salutary-8769@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 55d225670def06b01af2e7a5e0446fbe946289e8 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:11 +0200
Subject: [PATCH] i40e: add validation for ring_len param
The `ring_len` parameter provided by the virtual function (VF)
is assigned directly to the hardware memory context (HMC) without
any validation.
To address this, introduce an upper boundary check for both Tx and Rx
queue lengths. The maximum number of descriptors supported by the
hardware is 8k-32.
Additionally, enforce alignment constraints: Tx rings must be a multiple
of 8, and Rx rings must be a multiple of 32.
Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 9b8efdeafbcf..cb37b2ac56f1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -653,6 +653,13 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
tx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 8 */
+ if (!IS_ALIGNED(info->ring_len, 8) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_context;
+ }
tx_ctx.qlen = info->ring_len;
tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
tx_ctx.rdylist_act = 0;
@@ -716,6 +723,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
rx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 32 */
+ if (!IS_ALIGNED(info->ring_len, 32) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_param;
+ }
rx_ctx.qlen = info->ring_len;
if (info->splithdr_enabled) {
The patch below does not apply to the 6.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.16.y
git checkout FETCH_HEAD
git cherry-pick -x 55ed11b181c43d81ce03b50209e4e7c4a14ba099
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092952-wooing-result-72e9@gregkh' --subject-prefix 'PATCH 6.16.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 55ed11b181c43d81ce03b50209e4e7c4a14ba099 Mon Sep 17 00:00:00 2001
From: Andrea Righi <arighi(a)nvidia.com>
Date: Sat, 20 Sep 2025 15:26:21 +0200
Subject: [PATCH] sched_ext: idle: Handle migration-disabled tasks in BPF code
When scx_bpf_select_cpu_dfl()/and() kfuncs are invoked outside of
ops.select_cpu() we can't rely on @p->migration_disabled to determine if
migration is disabled for the task @p.
In fact, migration is always disabled for the current task while running
BPF code: __bpf_prog_enter() disables migration and __bpf_prog_exit()
re-enables it.
To handle this, when @p->migration_disabled == 1, check whether @p is
the current task. If so, migration was not disabled before entering the
callback, otherwise migration was disabled.
This ensures correct idle CPU selection in all cases. The behavior of
ops.select_cpu() remains unchanged, because this callback is never
invoked for the current task and migration-disabled tasks are always
excluded.
Example: without this change scx_bpf_select_cpu_and() called from
ops.enqueue() always returns -EBUSY; with this change applied, it
correctly returns idle CPUs.
Fixes: 06efc9fe0b8de ("sched_ext: idle: Handle migration-disabled tasks in idle selection")
Cc: stable(a)vger.kernel.org # v6.16+
Signed-off-by: Andrea Righi <arighi(a)nvidia.com>
Acked-by: Changwoo Min <changwoo(a)igalia.com>
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 7174e1c1a392..537c6992bb63 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -856,6 +856,32 @@ static bool check_builtin_idle_enabled(void)
return false;
}
+/*
+ * Determine whether @p is a migration-disabled task in the context of BPF
+ * code.
+ *
+ * We can't simply check whether @p->migration_disabled is set in a
+ * sched_ext callback, because migration is always disabled for the current
+ * task while running BPF code.
+ *
+ * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) respectively
+ * disable and re-enable migration. For this reason, the current task
+ * inside a sched_ext callback is always a migration-disabled task.
+ *
+ * Therefore, when @p->migration_disabled == 1, check whether @p is the
+ * current task or not: if it is, then migration was not disabled before
+ * entering the callback, otherwise migration was disabled.
+ *
+ * Returns true if @p is migration-disabled, false otherwise.
+ */
+static bool is_bpf_migration_disabled(const struct task_struct *p)
+{
+ if (p->migration_disabled == 1)
+ return p != current;
+ else
+ return p->migration_disabled;
+}
+
static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *allowed, u64 flags)
{
@@ -898,7 +924,7 @@ static s32 select_cpu_from_kfunc(struct task_struct *p, s32 prev_cpu, u64 wake_f
* selection optimizations and simply check whether the previously
* used CPU is idle and within the allowed cpumask.
*/
- if (p->nr_cpus_allowed == 1 || is_migration_disabled(p)) {
+ if (p->nr_cpus_allowed == 1 || is_bpf_migration_disabled(p)) {
if (cpumask_test_cpu(prev_cpu, allowed ?: p->cpus_ptr) &&
scx_idle_test_and_clear_cpu(prev_cpu))
cpu = prev_cpu;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 55d225670def06b01af2e7a5e0446fbe946289e8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092934-spinning-happening-f92a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 55d225670def06b01af2e7a5e0446fbe946289e8 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:11 +0200
Subject: [PATCH] i40e: add validation for ring_len param
The `ring_len` parameter provided by the virtual function (VF)
is assigned directly to the hardware memory context (HMC) without
any validation.
To address this, introduce an upper boundary check for both Tx and Rx
queue lengths. The maximum number of descriptors supported by the
hardware is 8k-32.
Additionally, enforce alignment constraints: Tx rings must be a multiple
of 8, and Rx rings must be a multiple of 32.
Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 9b8efdeafbcf..cb37b2ac56f1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -653,6 +653,13 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
tx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 8 */
+ if (!IS_ALIGNED(info->ring_len, 8) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_context;
+ }
tx_ctx.qlen = info->ring_len;
tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
tx_ctx.rdylist_act = 0;
@@ -716,6 +723,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
rx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 32 */
+ if (!IS_ALIGNED(info->ring_len, 32) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_param;
+ }
rx_ctx.qlen = info->ring_len;
if (info->splithdr_enabled) {
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 55d225670def06b01af2e7a5e0446fbe946289e8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092933-confess-manhole-6d70@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 55d225670def06b01af2e7a5e0446fbe946289e8 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:11 +0200
Subject: [PATCH] i40e: add validation for ring_len param
The `ring_len` parameter provided by the virtual function (VF)
is assigned directly to the hardware memory context (HMC) without
any validation.
To address this, introduce an upper boundary check for both Tx and Rx
queue lengths. The maximum number of descriptors supported by the
hardware is 8k-32.
Additionally, enforce alignment constraints: Tx rings must be a multiple
of 8, and Rx rings must be a multiple of 32.
Fixes: 5c3c48ac6bf5 ("i40e: implement virtual device interface")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 9b8efdeafbcf..cb37b2ac56f1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -653,6 +653,13 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
tx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 8 */
+ if (!IS_ALIGNED(info->ring_len, 8) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_context;
+ }
tx_ctx.qlen = info->ring_len;
tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
tx_ctx.rdylist_act = 0;
@@ -716,6 +723,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
/* only set the required fields */
rx_ctx.base = info->dma_ring_addr / 128;
+
+ /* ring_len has to be multiple of 32 */
+ if (!IS_ALIGNED(info->ring_len, 32) ||
+ info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) {
+ ret = -EINVAL;
+ goto error_param;
+ }
rx_ctx.qlen = info->ring_len;
if (info->splithdr_enabled) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 456c32e3c4316654f95f9d49c12cbecfb77d5660
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092944-overact-prowler-60f7@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 456c32e3c4316654f95f9d49c12cbecfb77d5660 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Fri, 19 Sep 2025 10:15:56 +0900
Subject: [PATCH] tracing: dynevent: Add a missing lockdown check on dynevent
Since dynamic_events interface on tracefs is compatible with
kprobe_events and uprobe_events, it should also check the lockdown
status and reject if it is set.
Link: https://lore.kernel.org/all/175824455687.45175.3734166065458520748.stgit@de…
Fixes: 17911ff38aa5 ("tracing: Add locked_down checks to the open calls of files created for tracefs")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 5d64a18cacac..d06854bd32b3 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -230,6 +230,10 @@ static int dyn_event_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = tracing_check_open_get_tr(NULL);
if (ret)
return ret;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b99dd77076bd3fddac6f7f1cbfa081c38fde17f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092903-gambling-usable-65ed@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b99dd77076bd3fddac6f7f1cbfa081c38fde17f5 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:18 +0200
Subject: [PATCH] i40e: improve VF MAC filters accounting
When adding new VM MAC, driver checks only *active* filters in
vsi->mac_filter_hash. Each MAC, even in non-active state is using resources.
To determine number of MACs VM uses, count VSI filters in *any* state.
Add i40e_count_all_filters() to simply count all filters, and rename
i40e_count_filters() to i40e_count_active_filters() to avoid ambiguity.
Fixes: cfb1d572c986 ("i40e: Add ensurance of MacVlan resources for every trusted VF")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 49aa4497efce..801a57a925da 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1278,7 +1278,8 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
const u8 *macaddr);
int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
-int i40e_count_filters(struct i40e_vsi *vsi);
+int i40e_count_all_filters(struct i40e_vsi *vsi);
+int i40e_count_active_filters(struct i40e_vsi *vsi);
struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b14019d44b58..529d5501baac 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1243,12 +1243,30 @@ void i40e_update_stats(struct i40e_vsi *vsi)
}
/**
- * i40e_count_filters - counts VSI mac filters
+ * i40e_count_all_filters - counts VSI MAC filters
* @vsi: the VSI to be searched
*
- * Returns count of mac filters
- **/
-int i40e_count_filters(struct i40e_vsi *vsi)
+ * Return: count of MAC filters in any state.
+ */
+int i40e_count_all_filters(struct i40e_vsi *vsi)
+{
+ struct i40e_mac_filter *f;
+ struct hlist_node *h;
+ int bkt, cnt = 0;
+
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+ cnt++;
+
+ return cnt;
+}
+
+/**
+ * i40e_count_active_filters - counts VSI MAC filters
+ * @vsi: the VSI to be searched
+ *
+ * Return: count of active MAC filters.
+ */
+int i40e_count_active_filters(struct i40e_vsi *vsi)
{
struct i40e_mac_filter *f;
struct hlist_node *h;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f9b2197f0942..081a4526a2f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2862,24 +2862,6 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
(u8 *)&stats, sizeof(stats));
}
-/**
- * i40e_can_vf_change_mac
- * @vf: pointer to the VF info
- *
- * Return true if the VF is allowed to change its MAC filters, false otherwise
- */
-static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
-{
- /* If the VF MAC address has been set administratively (via the
- * ndo_set_vf_mac command), then deny permission to the VF to
- * add/delete unicast MAC addresses, unless the VF is trusted
- */
- if (vf->pf_set_mac && !vf->trusted)
- return false;
-
- return true;
-}
-
#define I40E_MAX_MACVLAN_PER_HW 3072
#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
(num_ports))
@@ -2918,8 +2900,10 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
struct i40e_hw *hw = &pf->hw;
- int mac2add_cnt = 0;
- int i;
+ int i, mac_add_max, mac_add_cnt = 0;
+ bool vf_trusted;
+
+ vf_trusted = test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
for (i = 0; i < al->num_elements; i++) {
struct i40e_mac_filter *f;
@@ -2939,9 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* The VF may request to set the MAC address filter already
* assigned to it so do not return an error in that case.
*/
- if (!i40e_can_vf_change_mac(vf) &&
- !is_multicast_ether_addr(addr) &&
- !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (!vf_trusted && !is_multicast_ether_addr(addr) &&
+ vf->pf_set_mac && !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
return -EPERM;
@@ -2950,29 +2933,33 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
/*count filters that really will be added*/
f = i40e_find_mac(vsi, addr);
if (!f)
- ++mac2add_cnt;
+ ++mac_add_cnt;
}
/* If this VF is not privileged, then we can't add more than a limited
- * number of addresses. Check to make sure that the additions do not
- * push us over the limit.
- */
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MAC_ADDR_PER_VF) {
- dev_err(&pf->pdev->dev,
- "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
- return -EPERM;
- }
- /* If this VF is trusted, it can use more resources than untrusted.
+ * number of addresses.
+ *
+ * If this VF is trusted, it can use more resources than untrusted.
* However to ensure that every trusted VF has appropriate number of
* resources, divide whole pool of resources per port and then across
* all VFs.
*/
- } else {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs,
- hw->num_ports)) {
+ if (!vf_trusted)
+ mac_add_max = I40E_VC_MAX_MAC_ADDR_PER_VF;
+ else
+ mac_add_max = I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, hw->num_ports);
+
+ /* VF can replace all its filters in one step, in this case mac_add_max
+ * will be added as active and another mac_add_max will be in
+ * a to-be-removed state. Account for that.
+ */
+ if ((i40e_count_active_filters(vsi) + mac_add_cnt) > mac_add_max ||
+ (i40e_count_all_filters(vsi) + mac_add_cnt) > 2 * mac_add_max) {
+ if (!vf_trusted) {
+ dev_err(&pf->pdev->dev,
+ "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+ return -EPERM;
+ } else {
dev_err(&pf->pdev->dev,
"Cannot add more MAC addresses, trusted VF exhausted it's resources\n");
return -EPERM;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b99dd77076bd3fddac6f7f1cbfa081c38fde17f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092902-unranked-reboot-ae0d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b99dd77076bd3fddac6f7f1cbfa081c38fde17f5 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:18 +0200
Subject: [PATCH] i40e: improve VF MAC filters accounting
When adding new VM MAC, driver checks only *active* filters in
vsi->mac_filter_hash. Each MAC, even in non-active state is using resources.
To determine number of MACs VM uses, count VSI filters in *any* state.
Add i40e_count_all_filters() to simply count all filters, and rename
i40e_count_filters() to i40e_count_active_filters() to avoid ambiguity.
Fixes: cfb1d572c986 ("i40e: Add ensurance of MacVlan resources for every trusted VF")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 49aa4497efce..801a57a925da 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1278,7 +1278,8 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
const u8 *macaddr);
int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
-int i40e_count_filters(struct i40e_vsi *vsi);
+int i40e_count_all_filters(struct i40e_vsi *vsi);
+int i40e_count_active_filters(struct i40e_vsi *vsi);
struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b14019d44b58..529d5501baac 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1243,12 +1243,30 @@ void i40e_update_stats(struct i40e_vsi *vsi)
}
/**
- * i40e_count_filters - counts VSI mac filters
+ * i40e_count_all_filters - counts VSI MAC filters
* @vsi: the VSI to be searched
*
- * Returns count of mac filters
- **/
-int i40e_count_filters(struct i40e_vsi *vsi)
+ * Return: count of MAC filters in any state.
+ */
+int i40e_count_all_filters(struct i40e_vsi *vsi)
+{
+ struct i40e_mac_filter *f;
+ struct hlist_node *h;
+ int bkt, cnt = 0;
+
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+ cnt++;
+
+ return cnt;
+}
+
+/**
+ * i40e_count_active_filters - counts VSI MAC filters
+ * @vsi: the VSI to be searched
+ *
+ * Return: count of active MAC filters.
+ */
+int i40e_count_active_filters(struct i40e_vsi *vsi)
{
struct i40e_mac_filter *f;
struct hlist_node *h;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f9b2197f0942..081a4526a2f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2862,24 +2862,6 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
(u8 *)&stats, sizeof(stats));
}
-/**
- * i40e_can_vf_change_mac
- * @vf: pointer to the VF info
- *
- * Return true if the VF is allowed to change its MAC filters, false otherwise
- */
-static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
-{
- /* If the VF MAC address has been set administratively (via the
- * ndo_set_vf_mac command), then deny permission to the VF to
- * add/delete unicast MAC addresses, unless the VF is trusted
- */
- if (vf->pf_set_mac && !vf->trusted)
- return false;
-
- return true;
-}
-
#define I40E_MAX_MACVLAN_PER_HW 3072
#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
(num_ports))
@@ -2918,8 +2900,10 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
struct i40e_hw *hw = &pf->hw;
- int mac2add_cnt = 0;
- int i;
+ int i, mac_add_max, mac_add_cnt = 0;
+ bool vf_trusted;
+
+ vf_trusted = test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
for (i = 0; i < al->num_elements; i++) {
struct i40e_mac_filter *f;
@@ -2939,9 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* The VF may request to set the MAC address filter already
* assigned to it so do not return an error in that case.
*/
- if (!i40e_can_vf_change_mac(vf) &&
- !is_multicast_ether_addr(addr) &&
- !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (!vf_trusted && !is_multicast_ether_addr(addr) &&
+ vf->pf_set_mac && !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
return -EPERM;
@@ -2950,29 +2933,33 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
/*count filters that really will be added*/
f = i40e_find_mac(vsi, addr);
if (!f)
- ++mac2add_cnt;
+ ++mac_add_cnt;
}
/* If this VF is not privileged, then we can't add more than a limited
- * number of addresses. Check to make sure that the additions do not
- * push us over the limit.
- */
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MAC_ADDR_PER_VF) {
- dev_err(&pf->pdev->dev,
- "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
- return -EPERM;
- }
- /* If this VF is trusted, it can use more resources than untrusted.
+ * number of addresses.
+ *
+ * If this VF is trusted, it can use more resources than untrusted.
* However to ensure that every trusted VF has appropriate number of
* resources, divide whole pool of resources per port and then across
* all VFs.
*/
- } else {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs,
- hw->num_ports)) {
+ if (!vf_trusted)
+ mac_add_max = I40E_VC_MAX_MAC_ADDR_PER_VF;
+ else
+ mac_add_max = I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, hw->num_ports);
+
+ /* VF can replace all its filters in one step, in this case mac_add_max
+ * will be added as active and another mac_add_max will be in
+ * a to-be-removed state. Account for that.
+ */
+ if ((i40e_count_active_filters(vsi) + mac_add_cnt) > mac_add_max ||
+ (i40e_count_all_filters(vsi) + mac_add_cnt) > 2 * mac_add_max) {
+ if (!vf_trusted) {
+ dev_err(&pf->pdev->dev,
+ "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+ return -EPERM;
+ } else {
dev_err(&pf->pdev->dev,
"Cannot add more MAC addresses, trusted VF exhausted it's resources\n");
return -EPERM;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b99dd77076bd3fddac6f7f1cbfa081c38fde17f5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025092901-doorpost-cure-351c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b99dd77076bd3fddac6f7f1cbfa081c38fde17f5 Mon Sep 17 00:00:00 2001
From: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Date: Wed, 13 Aug 2025 12:45:18 +0200
Subject: [PATCH] i40e: improve VF MAC filters accounting
When adding new VM MAC, driver checks only *active* filters in
vsi->mac_filter_hash. Each MAC, even in non-active state is using resources.
To determine number of MACs VM uses, count VSI filters in *any* state.
Add i40e_count_all_filters() to simply count all filters, and rename
i40e_count_filters() to i40e_count_active_filters() to avoid ambiguity.
Fixes: cfb1d572c986 ("i40e: Add ensurance of MacVlan resources for every trusted VF")
Cc: stable(a)vger.kernel.org
Signed-off-by: Lukasz Czapnik <lukasz.czapnik(a)intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com>
Signed-off-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 49aa4497efce..801a57a925da 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1278,7 +1278,8 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
const u8 *macaddr);
int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
-int i40e_count_filters(struct i40e_vsi *vsi);
+int i40e_count_all_filters(struct i40e_vsi *vsi);
+int i40e_count_active_filters(struct i40e_vsi *vsi);
struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index b14019d44b58..529d5501baac 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1243,12 +1243,30 @@ void i40e_update_stats(struct i40e_vsi *vsi)
}
/**
- * i40e_count_filters - counts VSI mac filters
+ * i40e_count_all_filters - counts VSI MAC filters
* @vsi: the VSI to be searched
*
- * Returns count of mac filters
- **/
-int i40e_count_filters(struct i40e_vsi *vsi)
+ * Return: count of MAC filters in any state.
+ */
+int i40e_count_all_filters(struct i40e_vsi *vsi)
+{
+ struct i40e_mac_filter *f;
+ struct hlist_node *h;
+ int bkt, cnt = 0;
+
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+ cnt++;
+
+ return cnt;
+}
+
+/**
+ * i40e_count_active_filters - counts VSI MAC filters
+ * @vsi: the VSI to be searched
+ *
+ * Return: count of active MAC filters.
+ */
+int i40e_count_active_filters(struct i40e_vsi *vsi)
{
struct i40e_mac_filter *f;
struct hlist_node *h;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index f9b2197f0942..081a4526a2f0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2862,24 +2862,6 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
(u8 *)&stats, sizeof(stats));
}
-/**
- * i40e_can_vf_change_mac
- * @vf: pointer to the VF info
- *
- * Return true if the VF is allowed to change its MAC filters, false otherwise
- */
-static bool i40e_can_vf_change_mac(struct i40e_vf *vf)
-{
- /* If the VF MAC address has been set administratively (via the
- * ndo_set_vf_mac command), then deny permission to the VF to
- * add/delete unicast MAC addresses, unless the VF is trusted
- */
- if (vf->pf_set_mac && !vf->trusted)
- return false;
-
- return true;
-}
-
#define I40E_MAX_MACVLAN_PER_HW 3072
#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \
(num_ports))
@@ -2918,8 +2900,10 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
struct i40e_pf *pf = vf->pf;
struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
struct i40e_hw *hw = &pf->hw;
- int mac2add_cnt = 0;
- int i;
+ int i, mac_add_max, mac_add_cnt = 0;
+ bool vf_trusted;
+
+ vf_trusted = test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
for (i = 0; i < al->num_elements; i++) {
struct i40e_mac_filter *f;
@@ -2939,9 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
* The VF may request to set the MAC address filter already
* assigned to it so do not return an error in that case.
*/
- if (!i40e_can_vf_change_mac(vf) &&
- !is_multicast_ether_addr(addr) &&
- !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+ if (!vf_trusted && !is_multicast_ether_addr(addr) &&
+ vf->pf_set_mac && !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
dev_err(&pf->pdev->dev,
"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
return -EPERM;
@@ -2950,29 +2933,33 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf,
/*count filters that really will be added*/
f = i40e_find_mac(vsi, addr);
if (!f)
- ++mac2add_cnt;
+ ++mac_add_cnt;
}
/* If this VF is not privileged, then we can't add more than a limited
- * number of addresses. Check to make sure that the additions do not
- * push us over the limit.
- */
- if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MAC_ADDR_PER_VF) {
- dev_err(&pf->pdev->dev,
- "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
- return -EPERM;
- }
- /* If this VF is trusted, it can use more resources than untrusted.
+ * number of addresses.
+ *
+ * If this VF is trusted, it can use more resources than untrusted.
* However to ensure that every trusted VF has appropriate number of
* resources, divide whole pool of resources per port and then across
* all VFs.
*/
- } else {
- if ((i40e_count_filters(vsi) + mac2add_cnt) >
- I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs,
- hw->num_ports)) {
+ if (!vf_trusted)
+ mac_add_max = I40E_VC_MAX_MAC_ADDR_PER_VF;
+ else
+ mac_add_max = I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, hw->num_ports);
+
+ /* VF can replace all its filters in one step, in this case mac_add_max
+ * will be added as active and another mac_add_max will be in
+ * a to-be-removed state. Account for that.
+ */
+ if ((i40e_count_active_filters(vsi) + mac_add_cnt) > mac_add_max ||
+ (i40e_count_all_filters(vsi) + mac_add_cnt) > 2 * mac_add_max) {
+ if (!vf_trusted) {
+ dev_err(&pf->pdev->dev,
+ "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+ return -EPERM;
+ } else {
dev_err(&pf->pdev->dev,
"Cannot add more MAC addresses, trusted VF exhausted it's resources\n");
return -EPERM;
From: Matt Fleming <mfleming(a)cloudflare.com>
This reverts commit b7ca5743a2604156d6083b88cefacef983f3a3a6.
If we dequeue a task (task B) that was sched delayed then that task is
definitely no longer on the rq and not tracked in the rbtree.
Unfortunately, task_on_rq_queued(B) will still return true because
dequeue_task() doesn't update p->on_rq.
This inconsistency can lead to tasks (task A) spinning indefinitely in
wait_task_inactive(), e.g. when delivering a fatal signal to a thread
group, because it thinks the task B is still queued (it's not) and waits
forever for it to unschedule.
Task A Task B
arch_do_signal_or_restart()
get_signal()
do_coredump()
coredump_wait()
zap_threads() arch_do_signal_or_restart()
wait_task_inactive() <-- SPIN get_signal()
do_group_exit()
do_exit()
coredump_task_exit()
schedule() <--- never comes back
Not only will task A spin forever in wait_task_inactive(), but task B
will also trigger RCU stalls:
INFO: rcu_tasks detected stalls on tasks:
00000000a973a4d8: .. nvcsw: 2/2 holdout: 1 idle_cpu: -1/79
task:ffmpeg state:I stack:0 pid:665601 tgid:665155 ppid:668691 task_flags:0x400448 flags:0x00004006
Call Trace:
<TASK>
__schedule+0x4fb/0xbf0
? srso_return_thunk+0x5/0x5f
schedule+0x27/0xf0
do_exit+0xdd/0xaa0
? __pfx_futex_wake_mark+0x10/0x10
do_group_exit+0x30/0x80
get_signal+0x81e/0x860
? srso_return_thunk+0x5/0x5f
? futex_wake+0x177/0x1a0
arch_do_signal_or_restart+0x2e/0x1f0
? srso_return_thunk+0x5/0x5f
? srso_return_thunk+0x5/0x5f
? __x64_sys_futex+0x10c/0x1d0
syscall_exit_to_user_mode+0xa5/0x130
do_syscall_64+0x57/0x110
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f22d05b0f16
RSP: 002b:00007f2265761cf0 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
RAX: fffffffffffffe00 RBX: 0000000000000000 RCX: 00007f22d05b0f16
RDX: 0000000000000000 RSI: 0000000000000189 RDI: 00005629e320d97c
RBP: 0000000000000000 R08: 0000000000000000 R09: 00000000ffffffff
R10: 0000000000000000 R11: 0000000000000246 R12: 00005629e320d928
R13: 0000000000000000 R14: 0000000000000001 R15: 00005629e320d97c
</TASK>
Fixes: b7ca5743a260 ("sched/core: Tweak wait_task_inactive() to force dequeue sched_delayed tasks")
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Oleg Nesterov <oleg(a)redhat.com>
Cc: John Stultz <jstultz(a)google.com>
Cc: Chris Arges <carges(a)cloudflare.com>
Cc: stable(a)vger.kernel.org # v6.12
Signed-off-by: Matt Fleming <mfleming(a)cloudflare.com>
---
kernel/sched/core.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ccba6fc3c3fe..2dfc3977920d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2293,12 +2293,6 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* just go back and repeat.
*/
rq = task_rq_lock(p, &rf);
- /*
- * If task is sched_delayed, force dequeue it, to avoid always
- * hitting the tick timeout in the queued case
- */
- if (p->se.sched_delayed)
- dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
trace_sched_wait_task(p);
running = task_on_cpu(rq, p);
queued = task_on_rq_queued(p);
--
2.34.1
DCCP sockets in DCCP_REQUESTING state do not check the sequence number
or acknowledgment number for incoming Reset, CloseReq, and Close packets.
As a result, an attacker can send a spoofed Reset packet while the client
is in the requesting state. The client will accept the packet without
verification and immediately close the connection, causing a denial of
service (DoS) attack.
This patch moves the processing of Reset, Close, and CloseReq packets
into dccp_rcv_request_sent_state_process() and validates the ack number
before accepting them.
This fix should apply to stable versions *only* in Linux 5.x and 6.x.
Note that DCCP was removed in Linux 6.16, so this patch is only relevant
for older versions. We tested it on Ubuntu 24.04 LTS (Linux 6.8) and
it worked as expected.
Signed-off-by: Yizhou Zhao <zhaoyz24(a)mails.tsinghua.edu.cn>
Cc: stable(a)vger.kernel.org
Signed-off-by: Yizhou Zhao <zhaoyz24(a)mails.tsinghua.edu.cn>
---
net/dccp/input.c | 54 ++++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 22 deletions(-)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 2cbb757a8..0b1ffb044 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -397,21 +397,22 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
* / * Response processing continues in Step 10; Reset
* processing continues in Step 9 * /
*/
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ dp->dccps_awl, dp->dccps_awh)) {
+ dccp_pr_debug("invalid ackno: S.AWL=%llu, "
+ "P.ackno=%llu, S.AWH=%llu\n",
+ (unsigned long long)dp->dccps_awl,
+ (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ (unsigned long long)dp->dccps_awh);
+ goto out_invalid_packet;
+ }
+
if (dh->dccph_type == DCCP_PKT_RESPONSE) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- long tstamp = dccp_timestamp();
-
- if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
- dp->dccps_awl, dp->dccps_awh)) {
- dccp_pr_debug("invalid ackno: S.AWL=%llu, "
- "P.ackno=%llu, S.AWH=%llu\n",
- (unsigned long long)dp->dccps_awl,
- (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long)dp->dccps_awh);
- goto out_invalid_packet;
- }
+ long tstamp = dccp_timestamp();
/*
* If option processing (Step 8) failed, return 1 here so that
* dccp_v4_do_rcv() sends a Reset. The Reset code depends on
@@ -496,6 +497,13 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
}
dccp_send_ack(sk);
return -1;
+ } else if (dh->dccph_type == DCCP_PKT_RESET) {
+ dccp_rcv_reset(sk, skb);
+ return 0;
+ } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+ return dccp_rcv_closereq(sk, skb);
+ } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+ return dccp_rcv_close(sk, skb);
}
out_invalid_packet:
@@ -658,17 +666,19 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* Set TIMEWAIT timer
* Drop packet and return
*/
- if (dh->dccph_type == DCCP_PKT_RESET) {
- dccp_rcv_reset(sk, skb);
- return 0;
- } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */
- if (dccp_rcv_closereq(sk, skb))
- return 0;
- goto discard;
- } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */
- if (dccp_rcv_close(sk, skb))
+ if (sk->sk_state != DCCP_REQUESTING) {
+ if (dh->dccph_type == DCCP_PKT_RESET) {
+ dccp_rcv_reset(sk, skb);
return 0;
- goto discard;
+ } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */
+ if (dccp_rcv_closereq(sk, skb))
+ return 0;
+ goto discard;
+ } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */
+ if (dccp_rcv_close(sk, skb))
+ return 0;
+ goto discard;
+ }
}
switch (sk->sk_state) {
--
2.34.1
Hi,
This series drops the QPIC interconnect and BCM nodes for the SDX75 SoC. The
reason is that this QPIC BCM resource is already defined as a RPMh clock in
clk-rpmh driver as like other SDX SoCs. So it is wrong to describe the same
resource in two different providers.
Also, without this series, the NAND driver fails to probe on SDX75 as the
interconnect sync state disables the QPIC nodes as there were no clients voting
for this ICC resource. However, the NAND driver had already voted for this BCM
resource through the clk-rpmh driver. Since both votes come from Linux, RPMh was
unable to distinguish between these two and ends up disabling the resource
during sync state.
Cc: linux-arm-msm(a)vger.kernel.org
Cc: linux-pm(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Cc: devicetree(a)vger.kernel.org
Cc: Manivannan Sadhasivam <mani(a)kernel.org>
Cc: Raviteja Laggyshetty <quic_rlaggysh(a)quicinc.com>
Cc: Lakshmi Sowjanya D <quic_laksd(a)quicinc.com>
To: Georgi Djakov <djakov(a)kernel.org>
To: Konrad Dybcio <konradybcio(a)kernel.org>
To: Rob Herring <robh(a)kernel.org>
To: Krzysztof Kozlowski <krzk+dt(a)kernel.org>
To: Conor Dooley <conor+dt(a)kernel.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
Changes in v2:
- Taken over the series from Raviteja
- Reordered the patches to avoid breaking build
- Improved the patch descriptions and kept the values for other defines
unchanged
---
Raviteja Laggyshetty (2):
interconnect: qcom: sdx75: Drop QPIC interconnect and BCM nodes
dt-bindings: interconnect: qcom: Drop QPIC_CORE IDs
drivers/interconnect/qcom/sdx75.c | 26 --------------------------
drivers/interconnect/qcom/sdx75.h | 2 --
include/dt-bindings/interconnect/qcom,sdx75.h | 2 --
3 files changed, 30 deletions(-)
---
base-commit: 8f5ae30d69d7543eee0d70083daf4de8fe15d585
change-id: 20250926-sdx75-icc-67c20b3de84a
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)oss.qualcomm.com>
Struct ff_effect_compat is embedded twice inside
uinput_ff_upload_compat, contains internal padding. In particular, there
is a hole after struct ff_replay to satisfy alignment requirements for
the following union member. Without clearing the structure,
copy_to_user() may leak stack data to userspace.
Initialize ff_up_compat to zero before filling valid fields.
Fixes: 2d56f3a32c0e ("Input: refactor evdev 32bit compat to be shareable with uinput")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zhen Ni <zhen.ni(a)easystack.cn>
---
drivers/input/misc/uinput.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 2c51ea9d01d7..13336a2fd49c 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -775,6 +775,7 @@ static int uinput_ff_upload_to_user(char __user *buffer,
if (in_compat_syscall()) {
struct uinput_ff_upload_compat ff_up_compat;
+ memset(&ff_up_compat, 0, sizeof(ff_up_compat));
ff_up_compat.request_id = ff_up->request_id;
ff_up_compat.retval = ff_up->retval;
/*
--
2.20.1
Hi Zhang, hi Jiri,
In Debian Staffan Melin reported that after an update containing the
commit 1a8953f4f774 ("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY"),
the input device with same idVendor and idProduct, the Jieli
Technology USB Composite Device, does not get recognized anymore.
The full Debian report is at: https://bugs.debian.org/1114557
The issue is not specific to the 6.12.y series and confirmed in 6.16.3
as well.
Staffan Melin did bisect the kernels between 6.12.38 (which was still
working) and 6.1.41 (which was not), confirming by bisection that the
offending commit is
1a8953f4f774 ("HID: Add IGNORE quirk for SMARTLINKTECHNOLOGY")
#regzbot introduced: 1a8953f4f774
#regzbot monitor: https://bugs.debian.org/1114557
So it looks that the quirk applied is unfortunately affecting
negatively as well Staffan Melin case.
Can you have a look?
Regards,
Salvatore
The quilt patch titled
Subject: mm: swap: check for stable address space before operating on the VMA
has been removed from the -mm tree. Its filename was
mm-swap-check-for-stable-address-space-before-operating-on-the-vma.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Charan Teja Kalla <charan.kalla(a)oss.qualcomm.com>
Subject: mm: swap: check for stable address space before operating on the VMA
Date: Wed, 24 Sep 2025 23:41:38 +0530
It is possible to hit a zero entry while traversing the vmas in unuse_mm()
called from swapoff path and accessing it causes the OOPS:
Unable to handle kernel NULL pointer dereference at virtual address
0000000000000446--> Loading the memory from offset 0x40 on the
XA_ZERO_ENTRY as address.
Mem abort info:
ESR = 0x0000000096000005
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x05: level 1 translation fault
The issue is manifested from the below race between the fork() on a
process and swapoff:
fork(dup_mmap()) swapoff(unuse_mm)
--------------- -----------------
1) Identical mtree is built using
__mt_dup().
2) copy_pte_range()-->
copy_nonpresent_pte():
The dst mm is added into the
mmlist to be visible to the
swapoff operation.
3) Fatal signal is sent to the parent
process(which is the current during the
fork) thus skip the duplication of the
vmas and mark the vma range with
XA_ZERO_ENTRY as a marker for this process
that helps during exit_mmap().
4) swapoff is tried on the
'mm' added to the 'mmlist' as
part of the 2.
5) unuse_mm(), that iterates
through the vma's of this 'mm'
will hit the non-NULL zero entry
and operating on this zero entry
as a vma is resulting into the
oops.
The proper fix would be around not exposing this partially-valid tree to
others when droping the mmap lock, which is being solved with [1]. A
simpler solution would be checking for MMF_UNSTABLE, as it is set if
mm_struct is not fully initialized in dup_mmap().
Thanks to Liam/Lorenzo/David for all the suggestions in fixing this
issue.
Link: https://lkml.kernel.org/r/20250924181138.1762750-1-charan.kalla@oss.qualcom…
Link: https://lore.kernel.org/all/20250815191031.3769540-1-Liam.Howlett@oracle.co… [1]
Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
Signed-off-by: Charan Teja Kalla <charan.kalla(a)oss.qualcomm.com>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Barry Song <baohua(a)kernel.org>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Kairui Song <kasong(a)tencent.com>
Cc: Kemeng Shi <shikemeng(a)huaweicloud.com>
Cc: Liam Howlett <liam.howlett(a)oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Peng Zhang <zhangpeng.00(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 3 +++
1 file changed, 3 insertions(+)
--- a/mm/swapfile.c~mm-swap-check-for-stable-address-space-before-operating-on-the-vma
+++ a/mm/swapfile.c
@@ -2389,6 +2389,8 @@ static int unuse_mm(struct mm_struct *mm
VMA_ITERATOR(vmi, mm, 0);
mmap_read_lock(mm);
+ if (check_stable_address_space(mm))
+ goto unlock;
for_each_vma(vmi, vma) {
if (vma->anon_vma && !is_vm_hugetlb_page(vma)) {
ret = unuse_vma(vma, type);
@@ -2398,6 +2400,7 @@ static int unuse_mm(struct mm_struct *mm
cond_resched();
}
+unlock:
mmap_read_unlock(mm);
return ret;
}
_
Patches currently in -mm which might be from charan.kalla(a)oss.qualcomm.com are
The quilt patch titled
Subject: mm/ksm: fix incorrect KSM counter handling in mm_struct during fork
has been removed from the -mm tree. Its filename was
mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Donet Tom <donettom(a)linux.ibm.com>
Subject: mm/ksm: fix incorrect KSM counter handling in mm_struct during fork
Date: Wed, 24 Sep 2025 00:16:59 +0530
Patch series "mm/ksm: Fix incorrect accounting of KSM counters during
fork", v3.
The first patch in this series fixes the incorrect accounting of KSM
counters such as ksm_merging_pages, ksm_rmap_items, and the global
ksm_zero_pages during fork.
The following patch add a selftest to verify the ksm_merging_pages counter
was updated correctly during fork.
Test Results
============
Without the first patch
-----------------------
# [RUN] test_fork_ksm_merging_page_count
not ok 10 ksm_merging_page in child: 32
With the first patch
--------------------
# [RUN] test_fork_ksm_merging_page_count
ok 10 ksm_merging_pages is not inherited after fork
This patch (of 2):
Currently, the KSM-related counters in `mm_struct`, such as
`ksm_merging_pages`, `ksm_rmap_items`, and `ksm_zero_pages`, are inherited
by the child process during fork. This results in inconsistent
accounting.
When a process uses KSM, identical pages are merged and an rmap item is
created for each merged page. The `ksm_merging_pages` and
`ksm_rmap_items` counters are updated accordingly. However, after a fork,
these counters are copied to the child while the corresponding rmap items
are not. As a result, when the child later triggers an unmerge, there are
no rmap items present in the child, so the counters remain stale, leading
to incorrect accounting.
A similar issue exists with `ksm_zero_pages`, which maintains both a
global counter and a per-process counter. During fork, the per-process
counter is inherited by the child, but the global counter is not
incremented. Since the child also references zero pages, the global
counter should be updated as well. Otherwise, during zero-page unmerge,
both the global and per-process counters are decremented, causing the
global counter to become inconsistent.
To fix this, ksm_merging_pages and ksm_rmap_items are reset to 0 during
fork, and the global ksm_zero_pages counter is updated with the
per-process ksm_zero_pages value inherited by the child. This ensures
that KSM statistics remain accurate and reflect the activity of each
process correctly.
Link: https://lkml.kernel.org/r/cover.1758648700.git.donettom@linux.ibm.com
Link: https://lkml.kernel.org/r/7b9870eb67ccc0d79593940d9dbd4a0b39b5d396.17586487…
Fixes: 7609385337a4 ("ksm: count ksm merging pages for each process")
Fixes: cb4df4cae4f2 ("ksm: count allocated ksm rmap_items for each process")
Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM")
Signed-off-by: Donet Tom <donettom(a)linux.ibm.com>
Reviewed-by: Chengming Zhou <chengming.zhou(a)linux.dev>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Aboorva Devarajan <aboorvad(a)linux.ibm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Donet Tom <donettom(a)linux.ibm.com>
Cc: "Ritesh Harjani (IBM)" <ritesh.list(a)gmail.com>
Cc: Wei Yang <richard.weiyang(a)gmail.com>
Cc: xu xin <xu.xin16(a)zte.com.cn>
Cc: <stable(a)vger.kernel.org> [6.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/ksm.h | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/include/linux/ksm.h~mm-ksm-fix-incorrect-ksm-counter-handling-in-mm_struct-during-fork
+++ a/include/linux/ksm.h
@@ -56,8 +56,14 @@ static inline long mm_ksm_zero_pages(str
static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
/* Adding mm to ksm is best effort on fork. */
- if (mm_flags_test(MMF_VM_MERGEABLE, oldmm))
+ if (mm_flags_test(MMF_VM_MERGEABLE, oldmm)) {
+ long nr_ksm_zero_pages = atomic_long_read(&mm->ksm_zero_pages);
+
+ mm->ksm_merging_pages = 0;
+ mm->ksm_rmap_items = 0;
+ atomic_long_add(nr_ksm_zero_pages, &ksm_zero_pages);
__ksm_enter(mm);
+ }
}
static inline int ksm_execve(struct mm_struct *mm)
_
Patches currently in -mm which might be from donettom(a)linux.ibm.com are