The patch below does not apply to the 6.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.8.y
git checkout FETCH_HEAD
git cherry-pick -x 7a1b3490f47e88ec4cbde65f1a77a0f4bc972282
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040519-palpable-barrel-9103@gregkh' --subject-prefix 'PATCH 6.8.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7a1b3490f47e88ec4cbde65f1a77a0f4bc972282 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti(a)redhat.com>
Date: Fri, 29 Mar 2024 13:08:52 +0100
Subject: [PATCH] mptcp: don't account accept() of non-MPC client as fallback
to TCP
Current MPTCP servers increment MPTcpExtMPCapableFallbackACK when they
accept non-MPC connections. As reported by Christoph, this is "surprising"
because the counter might become greater than MPTcpExtMPCapableSYNRX.
MPTcpExtMPCapableFallbackACK counter's name suggests it should only be
incremented when a connection was seen using MPTCP options, then a
fallback to TCP has been done. Let's do that by incrementing it when
the subflow context of an inbound MPC connection attempt is dropped.
Also, update mptcp_connect.sh kselftest, to ensure that the
above MIB does not increment in case a pure TCP client connects to a
MPTCP server.
Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure")
Cc: stable(a)vger.kernel.org
Reported-by: Christoph Paasch <cpaasch(a)apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/449
Signed-off-by: Davide Caratti <dcaratti(a)redhat.com>
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-1-…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3a1967bc7bad..7e74b812e366 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
mptcp_set_state(newsk, TCP_CLOSE);
}
} else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
tcpfallback:
newsk->sk_kern_sock = kern;
lock_sock(newsk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1626dd20c68f..6042a47da61b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -905,6 +905,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
return child;
fallback:
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
mptcp_subflow_drop_ctx(child);
return child;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4c4248554826..4131f3263a48 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -383,12 +383,14 @@ do_transfer()
local stat_cookierx_last
local stat_csum_err_s
local stat_csum_err_c
+ local stat_tcpfb_last_l
stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -457,11 +459,13 @@ do_transfer()
local stat_cookietx_now
local stat_cookierx_now
local stat_ooo_now
+ local stat_tcpfb_now_l
stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -508,6 +512,11 @@ do_transfer()
fi
fi
+ if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ mptcp_lib_pr_fail "unexpected fallback to TCP"
+ rets=1
+ fi
+
if [ $cookies -eq 2 ];then
if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
extra+=" WARN: CookieSent: did not advance"
From: Kan Liang <kan.liang(a)linux.intel.com>
A non-0 retire latency can be observed on a Raptorlake which doesn't
support the retire latency feature.
By design, the retire latency shares the PERF_SAMPLE_WEIGHT_STRUCT
sample type with other types of latency. That could avoid adding too
many different sample types to support all kinds of latency. For the
machine which doesn't support some kind of latency, 0 should be
returned.
Perf doesn’t clear/init all the fields of a sample data for the sake
of performance. It expects the later perf_{prepare,output}_sample() to
update the uninitialized field. However, the current implementation
doesn't touch the field of the retire latency if the feature is not
supported. The memory garbage is dumped into the perf data.
Clear the retire latency if the feature is not supported.
Fixes: c87a31093c70 ("perf/x86: Support Retire Latency")
Reported-by: "Bayduraev, Alexey V" <alexey.v.bayduraev(a)intel.com>
Tested-by: "Bayduraev, Alexey V" <alexey.v.bayduraev(a)intel.com>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/events/intel/ds.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index f95cca6b632a..838f3e23bce9 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1989,8 +1989,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
set_linear_ip(regs, basic->ip);
regs->flags = PERF_EFLAGS_EXACT;
- if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
- data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
+ data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+ else
+ data->weight.var3_w = 0;
+ }
/*
* The record for MEMINFO is in front of GP
--
2.35.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4535e1a4174c4111d92c5a9a21e542d232e0fcaa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024033032-confess-monument-a6db@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4535e1a4174c4111d92c5a9a21e542d232e0fcaa Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp(a)alien8.de>
Date: Thu, 28 Mar 2024 13:59:05 +0100
Subject: [PATCH] x86/bugs: Fix the SRSO mitigation on Zen3/4
The original version of the mitigation would patch in the calls to the
untraining routines directly. That is, the alternative() in UNTRAIN_RET
will patch in the CALL to srso_alias_untrain_ret() directly.
However, even if commit e7c25c441e9e ("x86/cpu: Cleanup the untrain
mess") meant well in trying to clean up the situation, due to micro-
architectural reasons, the untraining routine srso_alias_untrain_ret()
must be the target of a CALL instruction and not of a JMP instruction as
it is done now.
Reshuffle the alternative macros to accomplish that.
Fixes: e7c25c441e9e ("x86/cpu: Cleanup the untrain mess")
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Reviewed-by: Ingo Molnar <mingo(a)kernel.org>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 076bf8dee702..25466c4d2134 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -14,6 +14,7 @@
#include <asm/asm.h>
#include <asm/fred.h>
#include <asm/gsseg.h>
+#include <asm/nospec-branch.h>
#ifndef CONFIG_X86_CMPXCHG64
extern void cmpxchg8b_emu(void);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index fc3a8a3c7ffe..170c89ed22fc 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -262,11 +262,20 @@
.Lskip_rsb_\@:
.endm
+/*
+ * The CALL to srso_alias_untrain_ret() must be patched in directly at
+ * the spot where untraining must be done, ie., srso_alias_untrain_ret()
+ * must be the target of a CALL instruction instead of indirectly
+ * jumping to a wrapper which then calls it. Therefore, this macro is
+ * called outside of __UNTRAIN_RET below, for the time being, before the
+ * kernel can support nested alternatives with arbitrary nesting.
+ */
+.macro CALL_UNTRAIN_RET
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
-#define CALL_UNTRAIN_RET "call entry_untrain_ret"
-#else
-#define CALL_UNTRAIN_RET ""
+ ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \
+ "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
#endif
+.endm
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
@@ -282,8 +291,8 @@
.macro __UNTRAIN_RET ibpb_feature, call_depth_insns
#if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY)
VALIDATE_UNRET_END
- ALTERNATIVE_3 "", \
- CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET
+ ALTERNATIVE_2 "", \
"call entry_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
@@ -342,6 +351,8 @@ extern void retbleed_return_thunk(void);
static inline void retbleed_return_thunk(void) {}
#endif
+extern void srso_alias_untrain_ret(void);
+
#ifdef CONFIG_MITIGATION_SRSO
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 721b528da9ac..02cde194a99e 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret)
lfence
jmp srso_alias_return_thunk
SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
.popsection
.pushsection .text..__x86.rethunk_safe
@@ -224,10 +225,12 @@ SYM_CODE_START(srso_return_thunk)
SYM_CODE_END(srso_return_thunk)
#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret"
#else /* !CONFIG_MITIGATION_SRSO */
+/* Dummy for the alternative in CALL_UNTRAIN_RET. */
+SYM_CODE_START(srso_alias_untrain_ret)
+ RET
+SYM_FUNC_END(srso_alias_untrain_ret)
#define JMP_SRSO_UNTRAIN_RET "ud2"
-#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2"
#endif /* CONFIG_MITIGATION_SRSO */
#ifdef CONFIG_MITIGATION_UNRET_ENTRY
@@ -319,9 +322,7 @@ SYM_FUNC_END(retbleed_untrain_ret)
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
- ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \
- JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \
- JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS
+ ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
Some systems have ACPI tables that don't include everything that needs
to be mapped for a successful kexec. These systems rely on identity
maps that include the full gigabyte surrounding any smaller region
requested for kexec success. Without this, they fail to kexec and end
up doing a full firmware reboot.
So, reduce the use of GB pages only on systems where this is known to
be necessary (specifically, UV systems).
Signed-off-by: Steve Wahl <steve.wahl(a)hpe.com>
Fixes: d794734c9bbf ("x86/mm/ident_map: Use gbpages only where full GB page should be mapped.")
Reported-by: Pavin Joseph <me(a)pavinjoseph.com>
Closes: https://lore.kernel.org/all/3a1b9909-45ac-4f97-ad68-d16ef1ce99db@pavinjosep…
Tested-by: Pavin Joseph <me(a)pavinjoseph.com>
Tested-by: Eric Hagberg <ehagberg(a)gmail.com>
Tested-by: Sarah Brofeldt <srhb(a)dbc.dk>
---
arch/x86/include/asm/init.h | 1 +
arch/x86/kernel/machine_kexec_64.c | 3 +++
arch/x86/mm/ident_map.c | 13 +++++++------
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..4ae843e8fefb 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -10,6 +10,7 @@ struct x86_mapping_info {
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
bool direct_gbpages; /* PUD level 1GB page support */
+ bool direct_gbpages_always; /* use 1GB pages exclusively */
unsigned long kernpg_flag; /* kernel pagetable flag override */
};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..1e1c6633bbec 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -28,6 +28,7 @@
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/cpu.h>
+#include <asm/uv/uv.h>
#ifdef CONFIG_ACPI
/*
@@ -212,6 +213,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
if (direct_gbpages)
info.direct_gbpages = true;
+ if (!is_uv_system())
+ info.direct_gbpages_always = true;
for (i = 0; i < nr_pfn_mapped; i++) {
mstart = pfn_mapped[i].start << PAGE_SHIFT;
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a204a332c71f..8039498b9713 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -39,12 +39,13 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
/* Is using a gbpage allowed? */
use_gbpage = info->direct_gbpages;
- /* Don't use gbpage if it maps more than the requested region. */
- /* at the begining: */
- use_gbpage &= ((addr & ~PUD_MASK) == 0);
- /* ... or at the end: */
- use_gbpage &= ((next & ~PUD_MASK) == 0);
-
+ if (!info->direct_gbpages_always) {
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the beginning: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+ }
/* Never overwrite existing mappings */
use_gbpage &= !pud_present(*pud);
--
2.26.2
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x fd819ad3ecf6f3c232a06b27423ce9ed8c20da89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040512-graves-stipend-babf@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd819ad3ecf6f3c232a06b27423ce9ed8c20da89 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming(a)zju.edu.cn>
Date: Fri, 29 Mar 2024 09:50:23 +0800
Subject: [PATCH] ax25: fix use-after-free bugs caused by ax25_ds_del_timer
When the ax25 device is detaching, the ax25_dev_device_down()
calls ax25_ds_del_timer() to cleanup the slave_timer. When
the timer handler is running, the ax25_ds_del_timer() that
calls del_timer() in it will return directly. As a result,
the use-after-free bugs could happen, one of the scenarios
is shown below:
(Thread 1) | (Thread 2)
| ax25_ds_timeout()
ax25_dev_device_down() |
ax25_ds_del_timer() |
del_timer() |
ax25_dev_put() //FREE |
| ax25_dev-> //USE
In order to mitigate bugs, when the device is detaching, use
timer_shutdown_sync() to stop the timer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/20240329015023.9223-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x fd819ad3ecf6f3c232a06b27423ce9ed8c20da89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040511-chamber-yelp-2ec9@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd819ad3ecf6f3c232a06b27423ce9ed8c20da89 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming(a)zju.edu.cn>
Date: Fri, 29 Mar 2024 09:50:23 +0800
Subject: [PATCH] ax25: fix use-after-free bugs caused by ax25_ds_del_timer
When the ax25 device is detaching, the ax25_dev_device_down()
calls ax25_ds_del_timer() to cleanup the slave_timer. When
the timer handler is running, the ax25_ds_del_timer() that
calls del_timer() in it will return directly. As a result,
the use-after-free bugs could happen, one of the scenarios
is shown below:
(Thread 1) | (Thread 2)
| ax25_ds_timeout()
ax25_dev_device_down() |
ax25_ds_del_timer() |
del_timer() |
ax25_dev_put() //FREE |
| ax25_dev-> //USE
In order to mitigate bugs, when the device is detaching, use
timer_shutdown_sync() to stop the timer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/20240329015023.9223-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x fd819ad3ecf6f3c232a06b27423ce9ed8c20da89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040509-monetize-joyride-107c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd819ad3ecf6f3c232a06b27423ce9ed8c20da89 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming(a)zju.edu.cn>
Date: Fri, 29 Mar 2024 09:50:23 +0800
Subject: [PATCH] ax25: fix use-after-free bugs caused by ax25_ds_del_timer
When the ax25 device is detaching, the ax25_dev_device_down()
calls ax25_ds_del_timer() to cleanup the slave_timer. When
the timer handler is running, the ax25_ds_del_timer() that
calls del_timer() in it will return directly. As a result,
the use-after-free bugs could happen, one of the scenarios
is shown below:
(Thread 1) | (Thread 2)
| ax25_ds_timeout()
ax25_dev_device_down() |
ax25_ds_del_timer() |
del_timer() |
ax25_dev_put() //FREE |
| ax25_dev-> //USE
In order to mitigate bugs, when the device is detaching, use
timer_shutdown_sync() to stop the timer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/20240329015023.9223-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x fd819ad3ecf6f3c232a06b27423ce9ed8c20da89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040506-earmark-skydiver-ab1c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd819ad3ecf6f3c232a06b27423ce9ed8c20da89 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming(a)zju.edu.cn>
Date: Fri, 29 Mar 2024 09:50:23 +0800
Subject: [PATCH] ax25: fix use-after-free bugs caused by ax25_ds_del_timer
When the ax25 device is detaching, the ax25_dev_device_down()
calls ax25_ds_del_timer() to cleanup the slave_timer. When
the timer handler is running, the ax25_ds_del_timer() that
calls del_timer() in it will return directly. As a result,
the use-after-free bugs could happen, one of the scenarios
is shown below:
(Thread 1) | (Thread 2)
| ax25_ds_timeout()
ax25_dev_device_down() |
ax25_ds_del_timer() |
del_timer() |
ax25_dev_put() //FREE |
| ax25_dev-> //USE
In order to mitigate bugs, when the device is detaching, use
timer_shutdown_sync() to stop the timer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/20240329015023.9223-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x fd819ad3ecf6f3c232a06b27423ce9ed8c20da89
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040503-tropical-fascism-219c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fd819ad3ecf6f3c232a06b27423ce9ed8c20da89 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming(a)zju.edu.cn>
Date: Fri, 29 Mar 2024 09:50:23 +0800
Subject: [PATCH] ax25: fix use-after-free bugs caused by ax25_ds_del_timer
When the ax25 device is detaching, the ax25_dev_device_down()
calls ax25_ds_del_timer() to cleanup the slave_timer. When
the timer handler is running, the ax25_ds_del_timer() that
calls del_timer() in it will return directly. As a result,
the use-after-free bugs could happen, one of the scenarios
is shown below:
(Thread 1) | (Thread 2)
| ax25_ds_timeout()
ax25_dev_device_down() |
ax25_ds_del_timer() |
del_timer() |
ax25_dev_put() //FREE |
| ax25_dev-> //USE
In order to mitigate bugs, when the device is detaching, use
timer_shutdown_sync() to stop the timer.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Duoming Zhou <duoming(a)zju.edu.cn>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/20240329015023.9223-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index c5462486dbca..282ec581c072 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
#ifdef CONFIG_AX25_DAMA_SLAVE
- ax25_ds_del_timer(ax25_dev);
+ timer_shutdown_sync(&ax25_dev->dama.slave_timer);
#endif
/*
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ea558de7238bb12c3435c47f0631e9d17bf4a09f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024040510-impose-yonder-1ce4@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ea558de7238bb12c3435c47f0631e9d17bf4a09f Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera(a)redhat.com>
Date: Sat, 16 Mar 2024 12:38:29 +0100
Subject: [PATCH] i40e: Enforce software interrupt during busy-poll exit
As for ice bug fixed by commit b7306b42beaf ("ice: manage interrupts
during poll exit") followed by commit 23be7075b318 ("ice: fix software
generating extra interrupts") I'm seeing the similar issue also with
i40e driver.
In certain situation when busy-loop is enabled together with adaptive
coalescing, the driver occasionally misses that there are outstanding
descriptors to clean when exiting busy poll.
Try to catch the remaining work by triggering a software interrupt
when exiting busy poll. No extra interrupts will be generated when
busy polling is not used.
The issue was found when running sockperf ping-pong tcp test with
adaptive coalescing and busy poll enabled (50 as value busy_pool
and busy_read sysctl knobs) and results in huge latency spikes
with more than 100000us.
The fix is inspired from the ice driver and do the following:
1) During napi poll exit in case of busy-poll (napo_complete_done()
returns false) this is recorded to q_vector that we were in busy
loop.
2) Extends i40e_buildreg_itr() to be able to add an enforced software
interrupt into built value
2) In i40e_update_enable_itr() enforces a software interrupt trigger
if we are exiting busy poll to catch any pending clean-ups
3) Reuses unused 3rd ITR (interrupt throttle) index and set it to
20K interrupts per second to limit the number of these sw interrupts.
Test results
============
Prior:
[root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120
sockperf: == version #3.10-no.git ==
sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s)
[ 0] IP = 10.9.9.1 PORT = 11111 # TCP
sockperf: Warmup stage (sending a few dummy messages)...
sockperf: Starting test...
sockperf: Test end (interrupted by timer)
sockperf: Test ended
sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2438563; ReceivedMessages=2438562
sockperf: ========= Printing statistics for Server No: 0
sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2429473; ReceivedMessages=2429473
sockperf: ====> avg-latency=24.571 (std-dev=93.297, mean-ad=4.904, median-ad=1.510, siqr=1.063, cv=3.797, std-error=0.060, 99.0% ci=[24.417, 24.725])
sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0
sockperf: Summary: Latency is 24.571 usec
sockperf: Total 2429473 observations; each percentile contains 24294.73 observations
sockperf: ---> <MAX> observation = 103294.331
sockperf: ---> percentile 99.999 = 45.633
sockperf: ---> percentile 99.990 = 37.013
sockperf: ---> percentile 99.900 = 35.910
sockperf: ---> percentile 99.000 = 33.390
sockperf: ---> percentile 90.000 = 28.626
sockperf: ---> percentile 75.000 = 27.741
sockperf: ---> percentile 50.000 = 26.743
sockperf: ---> percentile 25.000 = 25.614
sockperf: ---> <MIN> observation = 12.220
After:
[root@dell-per640-07 net]# sockperf ping-pong -i 10.9.9.1 --tcp -m 1000 --mps=max -t 120
sockperf: == version #3.10-no.git ==
sockperf[CLIENT] send on:sockperf: using recvfrom() to block on socket(s)
[ 0] IP = 10.9.9.1 PORT = 11111 # TCP
sockperf: Warmup stage (sending a few dummy messages)...
sockperf: Starting test...
sockperf: Test end (interrupted by timer)
sockperf: Test ended
sockperf: [Total Run] RunTime=119.999 sec; Warm up time=400 msec; SentMessages=2400055; ReceivedMessages=2400054
sockperf: ========= Printing statistics for Server No: 0
sockperf: [Valid Duration] RunTime=119.549 sec; SentMessages=2391186; ReceivedMessages=2391186
sockperf: ====> avg-latency=24.965 (std-dev=5.934, mean-ad=4.642, median-ad=1.485, siqr=1.067, cv=0.238, std-error=0.004, 99.0% ci=[24.955, 24.975])
sockperf: # dropped messages = 0; # duplicated messages = 0; # out-of-order messages = 0
sockperf: Summary: Latency is 24.965 usec
sockperf: Total 2391186 observations; each percentile contains 23911.86 observations
sockperf: ---> <MAX> observation = 195.841
sockperf: ---> percentile 99.999 = 45.026
sockperf: ---> percentile 99.990 = 39.009
sockperf: ---> percentile 99.900 = 35.922
sockperf: ---> percentile 99.000 = 33.482
sockperf: ---> percentile 90.000 = 28.902
sockperf: ---> percentile 75.000 = 27.821
sockperf: ---> percentile 50.000 = 26.860
sockperf: ---> percentile 25.000 = 25.685
sockperf: ---> <MIN> observation = 12.277
Fixes: 0bcd952feec7 ("ethernet/intel: consolidate NAPI and NAPI exit")
Reported-by: Hugo Ferreira <hferreir(a)redhat.com>
Reviewed-by: Michal Schmidt <mschmidt(a)redhat.com>
Signed-off-by: Ivan Vecera <ivecera(a)redhat.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg(a)intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha(a)intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index ba24f3fa92c3..2fbabcdb5bb5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -955,6 +955,7 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state;
+ bool in_busy_poll;
int irq_num; /* IRQ assigned to this q_vector */
} ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f86578857e8a..6576a0081093 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3911,6 +3911,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;
+ /* Set ITR for software interrupts triggered after exiting
+ * busy-loop polling.
+ */
+ wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+ I40E_ITR_20K);
+
wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 14ab642cafdb..432afbb64201 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -333,8 +333,11 @@
#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0d7177083708..1a12b732818e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2630,7 +2630,22 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
return failure ? budget : (int)total_rx_packets;
}
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+ bool force_swint)
{
u32 val;
@@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
* an event in the PBA anyway so we need to rely on the automask
* to hold pending events for us until the interrupt is re-enabled
*
- * The itr value is reported in microseconds, and the register
- * value is recorded in 2 microsecond units. For this reason we
- * only need to shift by the interval shift - 1 instead of the
- * full value.
+ * We have to shift the given value as it is reported in microseconds
+ * and the register value is recorded in 2 microsecond units.
*/
- itr &= I40E_ITR_MASK;
+ interval >>= 1;
+ /* 1. Enable vector interrupt
+ * 2. Update the interval for the specified ITR index
+ * (I40E_ITR_NONE in the register is used to indicate that
+ * no interval update is requested)
+ */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+ /* 3. Enforce software interrupt trigger if requested
+ * (These software interrupts rate is limited by ITR2 that is
+ * set to 20K interrupts per second)
+ */
+ if (force_swint)
+ val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+ I40E_SW_ITR);
return val;
}
-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
/* The act of updating the ITR will cause it to immediately trigger. In order
* to prevent this from throwing off adaptive update statistics we defer the
* update so that it can only happen so often. So after either Tx or Rx are
@@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
+ enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
struct i40e_hw *hw = &vsi->back->hw;
- u32 intval;
+ u16 interval = 0;
+ u32 itr_val;
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
@@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
/* Rx ITR needs to be reduced, this is highest priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
@@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
/* Tx ITR needs to be reduced, this is second priority
* Tx ITR needs to be increased more than Rx, fourth priority
*/
- intval = i40e_buildreg_itr(I40E_TX_ITR,
- q_vector->tx.target_itr);
+ itr_idx = I40E_TX_ITR;
+ interval = q_vector->tx.target_itr;
q_vector->tx.current_itr = q_vector->tx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
/* Rx ITR needs to be increased, third priority */
- intval = i40e_buildreg_itr(I40E_RX_ITR,
- q_vector->rx.target_itr);
+ itr_idx = I40E_RX_ITR;
+ interval = q_vector->rx.target_itr;
q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->itr_countdown = ITR_COUNTDOWN_START;
} else {
/* No ITR update, lowest priority */
- intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
}
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(q_vector->reg_idx), intval);
+ /* Do not update interrupt control register if VSI is down */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ /* Update ITR interval if necessary and enforce software interrupt
+ * if we are exiting busy poll.
+ */
+ if (q_vector->in_busy_poll) {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+ q_vector->in_busy_poll = false;
+ } else {
+ itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+ }
+ wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
}
/**
@@ -2845,6 +2883,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
*/
if (likely(napi_complete_done(napi, work_done)))
i40e_update_enable_itr(vsi, q_vector);
+ else
+ q_vector->in_busy_poll = true;
return min(work_done, budget - 1);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index abf15067eb5d..2cdc7de6301c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -68,6 +68,7 @@ enum i40e_dyn_idx {
/* these are indexes into ITRN registers */
#define I40E_RX_ITR I40E_IDX_ITR0
#define I40E_TX_ITR I40E_IDX_ITR1
+#define I40E_SW_ITR I40E_IDX_ITR2
/* Supported RSS offloads */
#define I40E_DEFAULT_RSS_HENA ( \