The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 7af03e688792293ba33149fb8df619a8dff90e80 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 8 Mar 2024 18:03:39 +0200
Subject: [PATCH] drm/probe-helper: warn about negative .get_modes()
The .get_modes() callback is supposed to return the number of modes,
never a negative error code. If a negative value is returned, it'll just
be interpreted as a negative count, and added to previous calculations.
Document the rules, but handle the negative values gracefully with an
error message.
Cc: stable(a)vger.kernel.org
Acked-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/50208c866facc33226a3c77b82bb9…
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/drm_probe_helper.c | 7 +++++++
include/drm/drm_modeset_helper_vtables.h | 3 ++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 19ecb749704be..75f84753f6ee3 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -422,6 +422,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector)
count = connector_funcs->get_modes(connector);
+ /* The .get_modes() callback should not return negative values. */
+ if (count < 0) {
+ drm_err(connector->dev, ".get_modes() returned %pe\n",
+ ERR_PTR(count));
+ count = 0;
+ }
+
/*
* Fallback for when DDC probe failed in drm_get_edid() and thus skipped
* override/firmware EDID.
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 881b03e4dc288..9ed42469540eb 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -898,7 +898,8 @@ struct drm_connector_helper_funcs {
*
* RETURNS:
*
- * The number of modes added by calling drm_mode_probed_add().
+ * The number of modes added by calling drm_mode_probed_add(). Return 0
+ * on failures (no modes) instead of negative error codes.
*/
int (*get_modes)(struct drm_connector *connector);
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 7fb19d9510937121a1f285894cffd30bc96572e3 Mon Sep 17 00:00:00 2001
From: Josip Pavic <josip.pavic(a)amd.com>
Date: Fri, 9 Feb 2024 16:05:18 -0500
Subject: [PATCH] drm/amd/display: Allow dirty rects to be sent to dmub when
abm is active
[WHY]
It's beneficial for ABM to know when new frame data are available.
[HOW]
Add new condition to allow dirty rects to be sent to DMUB when ABM is
active. ABM will use this as a signal that a new frame has arrived.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Anthony Koo <anthony.koo(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Josip Pavic <josip.pavic(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5211c1c0f3c0c..613d09c42f3b9 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -3270,6 +3270,9 @@ static bool dc_dmub_should_send_dirty_rect_cmd(struct dc *dc, struct dc_stream_s
if (stream->link->replay_settings.config.replay_supported)
return true;
+ if (stream->ctx->dce_version >= DCN_VERSION_3_5 && stream->abm_level)
+ return true;
+
return false;
}
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 9c68ece8b2a5c5ff9b2fcaea923dd73efeb174cd Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai(a)loongson.cn>
Date: Tue, 19 Mar 2024 15:50:34 +0800
Subject: [PATCH] LoongArch: Define the __io_aw() hook as mmiowb()
Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:
"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."
The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.
So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.
Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:
radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.…
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loon…
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/Kbuild | 1 +
arch/loongarch/include/asm/io.h | 2 ++
arch/loongarch/include/asm/qspinlock.h | 18 ------------------
3 files changed, 3 insertions(+), 18 deletions(-)
delete mode 100644 arch/loongarch/include/asm/qspinlock.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866a..2dbec7853ae86 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
+generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b662..4a8adcca329b8 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#define __io_aw() mmiowb()
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad5912..0000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- compiletime_assert_atomic_type(lock->locked);
- c_sync();
- WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 13c0a74747cb7fdadf58c5d3a7d52cfca2d51736 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Wed, 13 Mar 2024 10:40:41 +0000
Subject: [PATCH] cifs: make sure server interfaces are requested only for
SMB3+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Some code paths for querying server interfaces make a false
assumption that it will only get called for SMB3+. Since this
function now can get called from a generic code paths, the correct
thing to do is to have specific handler for this functionality
per SMB dialect, and call this handler.
This change adds such a handler and implements this handler only
for SMB 3.0 and 3.1.1.
Cc: stable(a)vger.kernel.org
Cc: Jan Čermák <sairon(a)sairon.cz>
Reported-by: Paulo Alcantara <pc(a)manguebit.com>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
---
fs/smb/client/cifsglob.h | 3 +++
fs/smb/client/connect.c | 6 +++++-
fs/smb/client/smb2ops.c | 2 ++
fs/smb/client/smb2pdu.c | 5 +++--
4 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8be62ed053a25..3da625d532359 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -355,6 +355,9 @@ struct smb_version_operations {
/* informational QFS call */
void (*qfs_tcon)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *);
+ /* query for server interfaces */
+ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *,
+ bool);
/* check if a path is accessible or not */
int (*is_path_accessible)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 86ae578904a26..4cbb79418e506 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -123,12 +123,16 @@ static void smb2_query_server_interfaces(struct work_struct *work)
struct cifs_tcon *tcon = container_of(work,
struct cifs_tcon,
query_interfaces.work);
+ struct TCP_Server_Info *server = tcon->ses->server;
/*
* query server network interfaces, in case they change
*/
+ if (!server->ops->query_server_interfaces)
+ return;
+
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc) {
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 6ee22d0dbc006..2ed456948f34c 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -5290,6 +5290,7 @@ struct smb_version_operations smb30_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
@@ -5405,6 +5406,7 @@ struct smb_version_operations smb311_operations = {
.tree_connect = SMB2_tcon,
.tree_disconnect = SMB2_tdis,
.qfs_tcon = smb3_qfs_tcon,
+ .query_server_interfaces = SMB3_request_interfaces,
.is_path_accessible = smb2_is_path_accessible,
.can_echo = smb2_can_echo,
.echo = SMB2_echo,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index e5e6b14f8cae3..3ea688558e6c9 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -409,14 +409,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
spin_unlock(&ses->ses_lock);
if (!rc &&
- (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) &&
+ server->ops->query_server_interfaces) {
mutex_unlock(&ses->session_mutex);
/*
* query server network interfaces, in case they change
*/
xid = get_xid();
- rc = SMB3_request_interfaces(xid, tcon, false);
+ rc = server->ops->query_server_interfaces(xid, tcon, false);
free_xid(xid);
if (rc == -EOPNOTSUPP && ses->chan_count > 1) {
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 16a57d7681110b25708c7042688412238e6f73a9 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad(a)microsoft.com>
Date: Wed, 13 Mar 2024 10:40:40 +0000
Subject: [PATCH] cifs: reduce warning log level for server not advertising
interfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Several users have reported this log getting dumped too regularly to
kernel log. The likely root cause has been identified, and it suggests
that this situation is expected for some configurations
(for example SMB2.1).
Since the function returns appropriately even for such cases, it is
fairly harmless to make this a debug log. When needed, the verbosity
can be increased to capture this log.
Cc: stable(a)vger.kernel.org
Reported-by: Jan Čermák <sairon(a)sairon.cz>
Signed-off-by: Shyam Prasad N <sprasad(a)microsoft.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
---
fs/smb/client/sess.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 8f37373fd3334..3216f786908fb 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -230,7 +230,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n",
ses->server->hostname);
break;
}
@@ -396,7 +396,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
spin_lock(&ses->iface_lock);
if (!ses->iface_count) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+ cifs_dbg(ONCE, "server %s does not advertise interfaces\n", ses->server->hostname);
return;
}
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 16c4770c75b1223998adbeb7286f9a15c65fba73 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Tue, 5 Mar 2024 15:23:02 +0800
Subject: [PATCH] dm-raid: really frozen sync_thread during suspend
1) commit f52f5c71f3d4 ("md: fix stopping sync thread") remove
MD_RECOVERY_FROZEN from __md_stop_writes() and doesn't realize that
dm-raid relies on __md_stop_writes() to frozen sync_thread
indirectly. Fix this problem by adding MD_RECOVERY_FROZEN in
md_stop_writes(), and since stop_sync_thread() is only used for
dm-raid in this case, also move stop_sync_thread() to
md_stop_writes().
2) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen,
it only prevent new sync_thread to start, and it can't stop the
running sync thread; In order to frozen sync_thread, after seting the
flag, stop_sync_thread() should be used.
3) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use
it as condition for md_stop_writes() in raid_postsuspend() doesn't
look correct. Consider that reentrant stop_sync_thread() do nothing,
always call md_stop_writes() in raid_postsuspend().
4) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime,
and if MD_RECOVERY_FROZEN is cleared while the array is suspended,
new sync_thread can start unexpected. Fix this by disallow
raid_message() to change sync_thread status during suspend.
Note that after commit f52f5c71f3d4 ("md: fix stopping sync thread"), the
test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(),
and with previous fixes, the test won't hang there anymore, however, the
test will still fail and complain that ext4 is corrupted. And with this
patch, the test won't hang due to stop_sync_thread() or fail due to ext4
is corrupted anymore. However, there is still a deadlock related to
dm-raid456 that will be fixed in following patches.
Reported-by: Mikulas Patocka <mpatocka(a)redhat.com>
Closes: https://lore.kernel.org/all/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
Fixes: 1af2048a3e87 ("dm raid: fix deadlock caused by premature md_stop_writes()")
Fixes: 9dbd1aa3a81c ("dm raid: add reshaping support to the target")
Fixes: f52f5c71f3d4 ("md: fix stopping sync thread")
Cc: stable(a)vger.kernel.org # v6.7+
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Signed-off-by: Xiao Ni <xni(a)redhat.com>
Acked-by: Mike Snitzer <snitzer(a)kernel.org>
Signed-off-by: Song Liu <song(a)kernel.org>
Link: https://lore.kernel.org/r/20240305072306.2562024-6-yukuai1@huaweicloud.com
---
drivers/md/dm-raid.c | 25 +++++++++++++++----------
drivers/md/md.c | 3 ++-
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index eb009d6bb03a1..e2d7a73c0f874 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
rs->md.ro = 1;
rs->md.in_sync = 1;
- /* Keep array frozen until resume. */
- set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
-
/* Has to be held on running the array */
mddev_suspend_and_lock_nointr(&rs->md);
+
+ /* Keep array frozen until resume. */
+ md_frozen_sync_thread(&rs->md);
+
r = md_run(&rs->md);
rs->md.in_sync = 0; /* Assume already marked dirty */
if (r) {
@@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
+ if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+ return -EBUSY;
+
if (!strcasecmp(argv[0], "frozen"))
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
else
@@ -3796,10 +3800,11 @@ static void raid_postsuspend(struct dm_target *ti)
struct raid_set *rs = ti->private;
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
- /* Writes have to be stopped before suspending to avoid deadlocks. */
- if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
- md_stop_writes(&rs->md);
-
+ /*
+ * sync_thread must be stopped during suspend, and writes have
+ * to be stopped before suspending to avoid deadlocks.
+ */
+ md_stop_writes(&rs->md);
mddev_suspend(&rs->md, false);
}
}
@@ -4012,8 +4017,6 @@ static int raid_preresume(struct dm_target *ti)
}
/* Check for any resize/reshape on @rs and adjust/initiate */
- /* Be prepared for mddev_resume() in raid_resume() */
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
@@ -4055,10 +4058,12 @@ static void raid_resume(struct dm_target *ti)
if (mddev->delta_disks < 0)
rs_set_capacity(rs);
+ WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
+ WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
mddev_lock_nointr(mddev);
- clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
mddev->ro = 0;
mddev->in_sync = 0;
+ md_unfrozen_sync_thread(mddev);
mddev_unlock_and_resume(mddev);
}
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 55ecc05c17c65..167db77442392 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6364,7 +6364,6 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
- stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);
if (mddev->pers && mddev->pers->quiesce) {
@@ -6389,6 +6388,8 @@ static void __md_stop_writes(struct mddev *mddev)
void md_stop_writes(struct mddev *mddev)
{
mddev_lock_nointr(mddev);
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ stop_sync_thread(mddev, true, false);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
--
2.43.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 18c198c96a815c962adc2b9b77909eec0be7df4d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson(a)redhat.com>
Date: Fri, 8 Mar 2024 16:05:25 -0700
Subject: [PATCH] vfio/pci: Create persistent INTx handler
A vulnerability exists where the eventfd for INTx signaling can be
deconfigured, which unregisters the IRQ handler but still allows
eventfds to be signaled with a NULL context through the SET_IRQS ioctl
or through unmask irqfd if the device interrupt is pending.
Ideally this could be solved with some additional locking; the igate
mutex serializes the ioctl and config space accesses, and the interrupt
handler is unregistered relative to the trigger, but the irqfd path
runs asynchronous to those. The igate mutex cannot be acquired from the
atomic context of the eventfd wake function. Disabling the irqfd
relative to the eventfd registration is potentially incompatible with
existing userspace.
As a result, the solution implemented here moves configuration of the
INTx interrupt handler to track the lifetime of the INTx context object
and irq_type configuration, rather than registration of a particular
trigger eventfd. Synchronization is added between the ioctl path and
eventfd_signal() wrapper such that the eventfd trigger can be
dynamically updated relative to in-flight interrupts or irqfd callbacks.
Cc: <stable(a)vger.kernel.org>
Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
Reported-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Kevin Tian <kevin.tian(a)intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Eric Auger <eric.auger(a)redhat.com>
Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com
Signed-off-by: Alex Williamson <alex.williamson(a)redhat.com>
---
drivers/vfio/pci/vfio_pci_intrs.c | 145 ++++++++++++++++--------------
1 file changed, 78 insertions(+), 67 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 75c85eec21b3c..fb5392b749fff 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
struct vfio_pci_irq_ctx *ctx;
+ struct eventfd_ctx *trigger;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return;
- eventfd_signal(ctx->trigger);
+
+ trigger = READ_ONCE(ctx->trigger);
+ if (likely(trigger))
+ eventfd_signal(trigger);
}
}
@@ -253,100 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
return ret;
}
-static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
+static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
+ struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
+ unsigned long irqflags;
+ char *name;
+ int ret;
if (!is_irq_none(vdev))
return -EINVAL;
- if (!vdev->pdev->irq)
+ if (!pdev->irq)
return -ENODEV;
+ name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
ctx = vfio_irq_ctx_alloc(vdev, 0);
if (!ctx)
return -ENOMEM;
+ ctx->name = name;
+ ctx->trigger = trigger;
+
/*
- * If the virtual interrupt is masked, restore it. Devices
- * supporting DisINTx can be masked at the hardware level
- * here, non-PCI-2.3 devices will have to wait until the
- * interrupt is enabled.
+ * Fill the initial masked state based on virq_disabled. After
+ * enable, changing the DisINTx bit in vconfig directly changes INTx
+ * masking. igate prevents races during setup, once running masked
+ * is protected via irqlock.
+ *
+ * Devices supporting DisINTx also reflect the current mask state in
+ * the physical DisINTx bit, which is not affected during IRQ setup.
+ *
+ * Devices without DisINTx support require an exclusive interrupt.
+ * IRQ masking is performed at the IRQ chip. Again, igate protects
+ * against races during setup and IRQ handlers and irqfds are not
+ * yet active, therefore masked is stable and can be used to
+ * conditionally auto-enable the IRQ.
+ *
+ * irq_type must be stable while the IRQ handler is registered,
+ * therefore it must be set before request_irq().
*/
ctx->masked = vdev->virq_disabled;
- if (vdev->pci_2_3)
- pci_intx(vdev->pdev, !ctx->masked);
+ if (vdev->pci_2_3) {
+ pci_intx(pdev, !ctx->masked);
+ irqflags = IRQF_SHARED;
+ } else {
+ irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
+ }
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
+ ret = request_irq(pdev->irq, vfio_intx_handler,
+ irqflags, ctx->name, vdev);
+ if (ret) {
+ vdev->irq_type = VFIO_PCI_NUM_IRQS;
+ kfree(name);
+ vfio_irq_ctx_free(vdev, ctx, 0);
+ return ret;
+ }
+
return 0;
}
-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *trigger)
{
struct pci_dev *pdev = vdev->pdev;
- unsigned long irqflags = IRQF_SHARED;
struct vfio_pci_irq_ctx *ctx;
- struct eventfd_ctx *trigger;
- unsigned long flags;
- int ret;
+ struct eventfd_ctx *old;
ctx = vfio_irq_ctx_get(vdev, 0);
if (WARN_ON_ONCE(!ctx))
return -EINVAL;
- if (ctx->trigger) {
- free_irq(pdev->irq, vdev);
- kfree(ctx->name);
- eventfd_ctx_put(ctx->trigger);
- ctx->trigger = NULL;
- }
-
- if (fd < 0) /* Disable only */
- return 0;
-
- ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
- pci_name(pdev));
- if (!ctx->name)
- return -ENOMEM;
-
- trigger = eventfd_ctx_fdget(fd);
- if (IS_ERR(trigger)) {
- kfree(ctx->name);
- return PTR_ERR(trigger);
- }
+ old = ctx->trigger;
- ctx->trigger = trigger;
+ WRITE_ONCE(ctx->trigger, trigger);
- /*
- * Devices without DisINTx support require an exclusive interrupt,
- * IRQ masking is performed at the IRQ chip. The masked status is
- * protected by vdev->irqlock. Setup the IRQ without auto-enable and
- * unmask as necessary below under lock. DisINTx is unmodified by
- * the IRQ configuration and may therefore use auto-enable.
- */
- if (!vdev->pci_2_3)
- irqflags = IRQF_NO_AUTOEN;
-
- ret = request_irq(pdev->irq, vfio_intx_handler,
- irqflags, ctx->name, vdev);
- if (ret) {
- ctx->trigger = NULL;
- kfree(ctx->name);
- eventfd_ctx_put(trigger);
- return ret;
+ /* Releasing an old ctx requires synchronizing in-flight users */
+ if (old) {
+ synchronize_irq(pdev->irq);
+ vfio_virqfd_flush_thread(&ctx->unmask);
+ eventfd_ctx_put(old);
}
- spin_lock_irqsave(&vdev->irqlock, flags);
- if (!vdev->pci_2_3 && !ctx->masked)
- enable_irq(pdev->irq);
- spin_unlock_irqrestore(&vdev->irqlock, flags);
-
return 0;
}
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
+ struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
ctx = vfio_irq_ctx_get(vdev, 0);
@@ -354,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
if (ctx) {
vfio_virqfd_disable(&ctx->unmask);
vfio_virqfd_disable(&ctx->mask);
+ free_irq(pdev->irq, vdev);
+ if (ctx->trigger)
+ eventfd_ctx_put(ctx->trigger);
+ kfree(ctx->name);
+ vfio_irq_ctx_free(vdev, ctx, 0);
}
- vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vfio_irq_ctx_free(vdev, ctx, 0);
}
/*
@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
return -EINVAL;
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ struct eventfd_ctx *trigger = NULL;
int32_t fd = *(int32_t *)data;
int ret;
- if (is_intx(vdev))
- return vfio_intx_set_signal(vdev, fd);
+ if (fd >= 0) {
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger))
+ return PTR_ERR(trigger);
+ }
- ret = vfio_intx_enable(vdev);
- if (ret)
- return ret;
+ if (is_intx(vdev))
+ ret = vfio_intx_set_signal(vdev, trigger);
+ else
+ ret = vfio_intx_enable(vdev, trigger);
- ret = vfio_intx_set_signal(vdev, fd);
- if (ret)
- vfio_intx_disable(vdev);
+ if (ret && trigger)
+ eventfd_ctx_put(trigger);
return ret;
}
--
2.43.0