6.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Stefan Metzmacher <metze(a)samba.org>
[ Upstream commit dc10cf1368af8cb816dcaa2502ba7d44fff20612 ]
sc->first_error might already be set and sc->status
is thus unexpected, so this should avoid the WARN[_ON]_ONCE()
if sc->first_error is already set and have a usable error path.
While there set sc->first_error as soon as possible.
This is done based on a problem seen in similar places on
the server. And there it was already very useful in order
to find the problem when we have a meaningful WARN_ONCE()
that prints details about the connection.
This is much more useful:
[ 309.560973] expected[NEGOTIATE_NEEDED] != RDMA_CONNECT_RUNNING
first_error=0 local=192.168.0.200:445 remote=192.168.0.100:60445
[ 309.561034] WARNING: CPU: 2 PID: 78 at transport_rdma.c:643
recv_done+0x2fa/0x3d0 [ksmbd]
than what we had before (only):
[ 894.140316] WARNING: CPU: 1 PID: 116 at
fs/smb/server/transport_rdma.c:642 recv_done+0x308/0x360 [ksmbd]
Fixes: 58dfba8a2d4e ("smb: client/smbdirect: replace SMBDIRECT_SOCKET_CONNECTING with more detailed states")
Cc: Steve French <smfrench(a)gmail.com>
Cc: Tom Talpey <tom(a)talpey.com>
Cc: Long Li <longli(a)microsoft.com>
Cc: Namjae Jeon <linkinjeon(a)kernel.org>
Cc: Paulo Alcantara <pc(a)manguebit.org>
Cc: linux-cifs(a)vger.kernel.org
Cc: samba-technical(a)lists.samba.org
Signed-off-by: Stefan Metzmacher <metze(a)samba.org>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/smb/client/smbdirect.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index c6c428c2e08dd..788a0670c4a8d 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/folio_queue.h>
+#define __SMBDIRECT_SOCKET_DISCONNECT(__sc) smbd_disconnect_rdma_connection(__sc)
#include "../common/smbdirect/smbdirect_pdu.h"
#include "smbdirect.h"
#include "cifs_debug.h"
@@ -186,6 +187,9 @@ static void smbd_disconnect_rdma_work(struct work_struct *work)
struct smbdirect_socket *sc =
container_of(work, struct smbdirect_socket, disconnect_work);
+ if (sc->first_error == 0)
+ sc->first_error = -ECONNABORTED;
+
/*
* make sure this and other work is not queued again
* but here we don't block and avoid
@@ -197,9 +201,6 @@ static void smbd_disconnect_rdma_work(struct work_struct *work)
disable_work(&sc->idle.immediate_work);
disable_delayed_work(&sc->idle.timer_work);
- if (sc->first_error == 0)
- sc->first_error = -ECONNABORTED;
-
switch (sc->status) {
case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED:
case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING:
@@ -242,6 +243,9 @@ static void smbd_disconnect_rdma_work(struct work_struct *work)
static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc)
{
+ if (sc->first_error == 0)
+ sc->first_error = -ECONNABORTED;
+
/*
* make sure other work (than disconnect_work) is
* not queued again but here we don't block and avoid
@@ -252,9 +256,6 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc)
disable_work(&sc->idle.immediate_work);
disable_delayed_work(&sc->idle.timer_work);
- if (sc->first_error == 0)
- sc->first_error = -ECONNABORTED;
-
switch (sc->status) {
case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED:
case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED:
@@ -322,27 +323,27 @@ static int smbd_conn_upcall(
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING);
+ if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING))
+ break;
sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED;
wake_up(&sc->status_wait);
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING);
+ if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING))
+ break;
sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED;
wake_up(&sc->status_wait);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING);
sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED;
smbd_disconnect_rdma_work(&sc->disconnect_work);
break;
case RDMA_CM_EVENT_ROUTE_ERROR:
log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING);
sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED;
smbd_disconnect_rdma_work(&sc->disconnect_work);
break;
@@ -428,7 +429,8 @@ static int smbd_conn_upcall(
min_t(u8, sp->responder_resources,
peer_responder_resources);
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING);
+ if (SMBDIRECT_CHECK_STATUS_DISCONNECT(sc, SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING))
+ break;
sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED;
wake_up(&sc->status_wait);
break;
@@ -437,7 +439,6 @@ static int smbd_conn_upcall(
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING);
sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED;
smbd_disconnect_rdma_work(&sc->disconnect_work);
break;
@@ -699,7 +700,8 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
negotiate_done =
process_negotiation_response(response, wc->byte_len);
put_receive_buffer(sc, response);
- WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING);
+ if (SMBDIRECT_CHECK_STATUS_WARN(sc, SMBDIRECT_SOCKET_NEGOTIATE_RUNNING))
+ negotiate_done = false;
if (!negotiate_done) {
sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED;
smbd_disconnect_rdma_connection(sc);
--
2.51.0
6.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Stefan Metzmacher <metze(a)samba.org>
[ Upstream commit 1f3fd108c5c5a9885c6c276a2489c49b60a6b90d ]
This can be used like this:
int err = somefunc();
pr_warn("err=%1pe\n", SMBDIRECT_DEBUG_ERR_PTR(err));
This will be used in the following fixes in order
to be prepared to identify real world problems
more easily.
Cc: Steve French <smfrench(a)gmail.com>
Cc: Tom Talpey <tom(a)talpey.com>
Cc: Long Li <longli(a)microsoft.com>
Cc: Namjae Jeon <linkinjeon(a)kernel.org>
Cc: Paulo Alcantara <pc(a)manguebit.org>
Cc: linux-cifs(a)vger.kernel.org
Cc: samba-technical(a)lists.samba.org
Signed-off-by: Stefan Metzmacher <metze(a)samba.org>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Stable-dep-of: 425c32750b48 ("smb: server: relax WARN_ON_ONCE(SMBDIRECT_SOCKET_*) checks in recv_done() and smb_direct_cm_handler()")
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/smb/common/smbdirect/smbdirect_socket.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
index ee5a90d691c89..611986827a5e2 100644
--- a/fs/smb/common/smbdirect/smbdirect_socket.h
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -74,6 +74,19 @@ const char *smbdirect_socket_status_string(enum smbdirect_socket_status status)
return "<unknown>";
}
+/*
+ * This can be used with %1pe to print errors as strings or '0'
+ * And it avoids warnings like: warn: passing zero to 'ERR_PTR'
+ * from smatch -p=kernel --pedantic
+ */
+static __always_inline
+const void * __must_check SMBDIRECT_DEBUG_ERR_PTR(long error)
+{
+ if (error == 0)
+ return NULL;
+ return ERR_PTR(error);
+}
+
enum smbdirect_keepalive_status {
SMBDIRECT_KEEPALIVE_NONE,
SMBDIRECT_KEEPALIVE_PENDING,
--
2.51.0
6.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Yu Kuai <yukuai(a)fnnas.com>
[ Upstream commit a913d1f6a7f607c110aeef8b58c8988f47a4b24e ]
Following test can cause IO hang:
mdadm -CvR /dev/md0 -l10 -n4 /dev/sd[abcd] --assume-clean --chunk=64K --bitmap=none
sleep 5
echo 1 > /sys/block/sda/device/delete
echo 1 > /sys/block/sdb/device/delete
echo 1 > /sys/block/sdc/device/delete
echo 1 > /sys/block/sdd/device/delete
dd if=/dev/md0 of=/dev/null bs=8k count=1 iflag=direct
Root cause:
1) all disks removed, however all rdevs in the array is still in sync,
IO will be issued normally.
2) IO failure from sda, and set badblocks failed, sda will be faulty
and MD_SB_CHANGING_PENDING will be set.
3) error recovery try to recover this IO from other disks, IO will be
issued to sdb, sdc, and sdd.
4) IO failure from sdb, and set badblocks failed again, now array is
broken and will become read-only.
5) IO failure from sdc and sdd, however, stripe can't be handled anymore
because MD_SB_CHANGING_PENDING is set:
handle_stripe
handle_stripe
if (test_bit MD_SB_CHANGING_PENDING)
set_bit STRIPE_HANDLE
goto finish
// skip handling failed stripe
release_stripe
if (test_bit STRIPE_HANDLE)
list_add_tail conf->hand_list
6) later raid5d can't handle failed stripe as well:
raid5d
md_check_recovery
md_update_sb
if (!md_is_rdwr())
// can't clear pending bit
return
if (test_bit MD_SB_CHANGING_PENDING)
break;
// can't handle failed stripe
Since MD_SB_CHANGING_PENDING can never be cleared for read-only array,
fix this problem by skip this checking for read-only array.
Link: https://lore.kernel.org/linux-raid/20251117085557.770572-3-yukuai@fnnas.com
Fixes: d87f064f5874 ("md: never update metadata when array is read-only.")
Signed-off-by: Yu Kuai <yukuai(a)fnnas.com>
Reviewed-by: Li Nan <linan122(a)huawei.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/md/raid5.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 24b32a0c95b40..8b5f8a12d4179 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4956,7 +4956,8 @@ static void handle_stripe(struct stripe_head *sh)
goto finish;
if (s.handle_bad_blocks ||
- test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags)) {
+ (md_is_rdwr(conf->mddev) &&
+ test_bit(MD_SB_CHANGE_PENDING, &conf->mddev->sb_flags))) {
set_bit(STRIPE_HANDLE, &sh->state);
goto finish;
}
@@ -6768,7 +6769,8 @@ static void raid5d(struct md_thread *thread)
int batch_size, released;
unsigned int offset;
- if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ if (md_is_rdwr(mddev) &&
+ test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
break;
released = release_stripe_list(conf, conf->temp_inactive_list);
--
2.51.0
6.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Alexandru Gagniuc <mr.nuke.me(a)gmail.com>
[ Upstream commit 7e81fa8d809ed1e67ae9ecd52d20a20c2c65d877 ]
The "qcom,halt-regs" consists of a phandle reference followed by the
three offsets within syscon for halt registers. Thus, we need to
request 4 integers from of_property_read_variable_u32_array(), with
the halt_reg ofsets at indexes 1, 2, and 3. Offset 0 is the phandle.
With MAX_HALT_REG at 3, of_property_read_variable_u32_array() returns
-EOVERFLOW, causing .probe() to fail.
Increase MAX_HALT_REG to 4, and update the indexes accordingly.
Fixes: 0af65b9b915e ("remoteproc: qcom: wcss: Add non pas wcss Q6 support for QCS404")
Signed-off-by: Alexandru Gagniuc <mr.nuke.me(a)gmail.com>
Link: https://lore.kernel.org/r/20251129013207.3981517-1-mr.nuke.me@gmail.com
Signed-off-by: Bjorn Andersson <andersson(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/remoteproc/qcom_q6v5_wcss.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/remoteproc/qcom_q6v5_wcss.c b/drivers/remoteproc/qcom_q6v5_wcss.c
index 07c88623f5978..23ec87827d4f8 100644
--- a/drivers/remoteproc/qcom_q6v5_wcss.c
+++ b/drivers/remoteproc/qcom_q6v5_wcss.c
@@ -85,7 +85,7 @@
#define TCSR_WCSS_CLK_MASK 0x1F
#define TCSR_WCSS_CLK_ENABLE 0x14
-#define MAX_HALT_REG 3
+#define MAX_HALT_REG 4
enum {
WCSS_IPQ8074,
WCSS_QCS404,
@@ -864,9 +864,9 @@ static int q6v5_wcss_init_mmio(struct q6v5_wcss *wcss,
return -EINVAL;
}
- wcss->halt_q6 = halt_reg[0];
- wcss->halt_wcss = halt_reg[1];
- wcss->halt_nc = halt_reg[2];
+ wcss->halt_q6 = halt_reg[1];
+ wcss->halt_wcss = halt_reg[2];
+ wcss->halt_nc = halt_reg[3];
return 0;
}
--
2.51.0
6.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Kumar Kartikeya Dwivedi <memxor(a)gmail.com>
[ Upstream commit 81d5a6a438595e46be191d602e5c2d6d73992fdc ]
In addition to deferring to the trylock fallback in NMIs, only do so
when an rqspinlock waiter is queued on the current CPU. This is detected
by noticing a non-zero node index. This allows NMI waiters to join the
waiter queue if it isn't interrupting an existing rqspinlock waiter, and
increase the chances of fairly obtaining the lock, performing deadlock
detection as the head, and not being starved while attempting the
trylock.
The trylock path in particular is unlikely to succeed under contention,
as it relies on the lock word becoming 0, which indicates no contention.
This means that the most likely result for NMIs attempting a trylock is
a timeout under contention if they don't hit an AA or ABBA case.
The core problem being addressed through the fixed commit was removing
the dependency edge between an NMI queue waiter and the queue waiter it
is interrupting. Whenever a circular dependency forms, and with no way
to break it (as non-head waiters don't poll for deadlocks or timeouts),
we would enter into a deadlock. A trylock either breaks such an edge by
probing for deadlocks, and finally terminating the waiting loop using a
timeout.
By excluding queueing on CPUs where the node index is non-zero for NMIs,
this sort of dependency is broken. The CPU enters the trylock path for
those cases, and falls back to deadlock checks and timeouts. However, in
other case where it doesn't interrupt the CPU in the slow path while its
queued on the lock, it can join the queue as a normal waiter, and avoid
trylock associated starvation and subsequent timeouts.
There are a few remaining cases here that matter: the NMI can still
preempt the owner in its critical section, and if it queues as a
non-head waiter, it can end up impeding the progress of the owner. While
this won't deadlock, since the head waiter will eventually signal the
NMI waiter to either stop (due to a timeout), it can still lead to long
timeouts. These gaps will be addressed in subsequent commits.
Note that while the node count detection approach is less conservative
than simply deferring NMIs to trylock, it is going to return errors
where attempts to lock B in NMI happen while waiters for lock A are in a
lower context on the same CPU. However, this only occurs when the lower
context is queued in the slow path, and the NMI attempt can proceed
without failure in all other cases. To continue to prevent AA deadlocks
(or ABBA in a similar NMI interrupting lower context pattern), we'd need
a more fleshed out algorithm to unlink NMI waiters after they queue and
detect such cases. However, all that complexity isn't appealing yet to
reduce the failure rate in the small window inside the slow path.
It is important to note that reentrancy in the slow path can also happen
through trace_contention_{begin,end}, but in those cases, unlike an NMI,
the forward progress of the head waiter (or the predecessor in general)
is not being blocked.
Fixes: 0d80e7f951be ("rqspinlock: Choose trylock fallback for NMI waiters")
Reported-by: Ritesh Oedayrajsingh Varma <ritesh(a)superluminal.eu>
Suggested-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor(a)gmail.com>
Link: https://lore.kernel.org/r/20251128232802.1031906-4-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
kernel/bpf/rqspinlock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c
index f4c534fa4e87b..3faf9cbd6c753 100644
--- a/kernel/bpf/rqspinlock.c
+++ b/kernel/bpf/rqspinlock.c
@@ -465,7 +465,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
* any MCS node. This is not the most elegant solution, but is
* simple enough.
*/
- if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) {
+ if (unlikely(idx >= _Q_MAX_NODES || (in_nmi() && idx > 0))) {
lockevent_inc(lock_no_node);
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
while (!queued_spin_trylock(lock)) {
--
2.51.0