The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ae4d37b5df749926891583d42a6801b5da11e3c1 Mon Sep 17 00:00:00 2001
From: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Date: Wed, 6 Apr 2022 21:04:44 +0200
Subject: [PATCH] drbd: fix an invalid memory access caused by incorrect use of
list iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The bug is here:
idr_remove(&connection->peer_devices, vnr);
If the previous for_each_connection() don't exit early (no goto hit
inside the loop), the iterator 'connection' after the loop will be a
bogus pointer to an invalid structure object containing the HEAD
(&resource->connections). As a result, the use of 'connection' above
will lead to a invalid memory access (including a possible invalid free
as idr_remove could call free_layer).
The original intention should have been to remove all peer_devices,
but the following lines have already done the work. So just remove
this line and the unneeded label, to fix this bug.
Cc: stable(a)vger.kernel.org
Fixes: c06ece6ba6f1b ("drbd: Turn connection->volumes into connection->peer_devices")
Signed-off-by: Xiaomeng Tong <xiam0nd.tong(a)gmail.com>
Reviewed-by: Christoph Böhmwalder <christoph.boehmwalder(a)linbit.com>
Reviewed-by: Lars Ellenberg <lars.ellenberg(a)linbit.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9676a1d214bc..d6dfa286ddb3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2773,12 +2773,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
if (init_submitter(device)) {
err = ERR_NOMEM;
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
}
err = add_disk(disk);
if (err)
- goto out_idr_remove_vol;
+ goto out_idr_remove_from_resource;
/* inherit the connection state */
device->state.conn = first_connection(resource)->cstate;
@@ -2792,8 +2792,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_debugfs_device_add(device);
return NO_ERROR;
-out_idr_remove_vol:
- idr_remove(&connection->peer_devices, vnr);
out_idr_remove_from_resource:
for_each_connection(connection, resource) {
peer_device = idr_remove(&connection->peer_devices, vnr);
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 08d835dff916bfe8f45acc7b92c7af6c4081c8a7
Gitweb: https://git.kernel.org/tip/08d835dff916bfe8f45acc7b92c7af6c4081c8a7
Author: Rei Yamamoto <yamamoto.rei(a)jp.fujitsu.com>
AuthorDate: Thu, 31 Mar 2022 09:33:09 +09:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Mon, 11 Apr 2022 09:58:03 +02:00
genirq/affinity: Consider that CPUs on nodes can be unbalanced
If CPUs on a node are offline at boot time, the number of nodes is
different when building affinity masks for present cpus and when building
affinity masks for possible cpus. This causes the following problem:
In the case that the number of vectors is less than the number of nodes
there are cases where bits of masks for present cpus are overwritten when
building masks for possible cpus.
Fix this by excluding CPUs, which are not part of the current build mask
(present/possible).
[ tglx: Massaged changelog and added comment ]
Fixes: b82592199032 ("genirq/affinity: Spread IRQs to all available NUMA nodes")
Signed-off-by: Rei Yamamoto <yamamoto.rei(a)jp.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Ming Lei <ming.lei(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20220331003309.10891-1-yamamoto.rei@jp.fujitsu.com
---
kernel/irq/affinity.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f7ff891..fdf1704 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -269,8 +269,9 @@ static int __irq_build_affinity_masks(unsigned int startvec,
*/
if (numvecs <= nodes) {
for_each_node_mask(n, nodemsk) {
- cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
- node_to_cpumask[n]);
+ /* Ensure that only CPUs which are in both masks are set */
+ cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
+ cpumask_or(&masks[curvec].mask, &masks[curvec].mask, nmsk);
if (++curvec == last_affv)
curvec = firstvec;
}
The patch below does not apply to the 5.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 62ed0bf7315b524973bb5fb9174b60e353289835 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Date: Mon, 7 Mar 2022 02:47:18 -0800
Subject: [PATCH] btrfs: zoned: remove left over ASSERT checking for single
profile
With commit dcf5652291f6 ("btrfs: zoned: allow DUP on meta-data block
groups") we started allowing DUP on metadata block groups, so the
ASSERT()s in btrfs_can_activate_zone() and btrfs_zoned_get_device() are
no longer valid and in fact even harmful.
Fixes: dcf5652291f6 ("btrfs: zoned: allow DUP on meta-data block groups")
CC: stable(a)vger.kernel.org # 5.17
Signed-off-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 61125aec8723..1b1b310c3c51 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1801,7 +1801,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
map = em->map_lookup;
/* We only support single profile for now */
- ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
free_extent_map(em);
@@ -1983,9 +1982,6 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
if (!btrfs_is_zoned(fs_info))
return true;
- /* Non-single profiles are not supported yet */
- ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
-
/* Check if there is a device with active zones left */
mutex_lock(&fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 60021bd754c6ca0addc6817994f20290a321d8d6 Mon Sep 17 00:00:00 2001
From: Kaiwen Hu <kevinhu(a)synology.com>
Date: Wed, 23 Mar 2022 15:10:32 +0800
Subject: [PATCH] btrfs: prevent subvol with swapfile from being deleted
A subvolume with an active swapfile must not be deleted otherwise it
would not be possible to deactivate it.
After the subvolume is deleted, we cannot swapoff the swapfile in this
deleted subvolume because the path is unreachable. The swapfile is
still active and holding references, the filesystem cannot be unmounted.
The test looks like this:
mkfs.btrfs -f $dev > /dev/null
mount $dev $mnt
btrfs sub create $mnt/subvol
touch $mnt/subvol/swapfile
chmod 600 $mnt/subvol/swapfile
chattr +C $mnt/subvol/swapfile
dd if=/dev/zero of=$mnt/subvol/swapfile bs=1K count=4096
mkswap $mnt/subvol/swapfile
swapon $mnt/subvol/swapfile
btrfs sub delete $mnt/subvol
swapoff $mnt/subvol/swapfile # failed: No such file or directory
swapoff --all
unmount $mnt # target is busy.
To prevent above issue, we simply check that whether the subvolume
contains any active swapfile, and stop the deleting process. This
behavior is like snapshot ioctl dealing with a swapfile.
CC: stable(a)vger.kernel.org # 5.4+
Reviewed-by: Robbie Ko <robbieko(a)synology.com>
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Kaiwen Hu <kevinhu(a)synology.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b976f757571f..5aab6af88349 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4487,6 +4487,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
return -EPERM;
}
+ if (atomic_read(&dest->nr_swapfiles)) {
+ spin_unlock(&dest->root_item_lock);
+ btrfs_warn(fs_info,
+ "attempt to delete subvolume %llu with active swapfile",
+ root->root_key.objectid);
+ return -EPERM;
+ }
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
@@ -11110,8 +11117,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
* set. We use this counter to prevent snapshots. We must increment it
* before walking the extents because we don't want a concurrent
* snapshot to run after we've already checked the extents.
+ *
+ * It is possible that subvolume is marked for deletion but still not
+ * removed yet. To prevent this race, we check the root status before
+ * activating the swapfile.
*/
+ spin_lock(&root->root_item_lock);
+ if (btrfs_root_dead(root)) {
+ spin_unlock(&root->root_item_lock);
+
+ btrfs_exclop_finish(fs_info);
+ btrfs_warn(fs_info,
+ "cannot activate swapfile because subvolume %llu is being deleted",
+ root->root_key.objectid);
+ return -EPERM;
+ }
atomic_inc(&root->nr_swapfiles);
+ spin_unlock(&root->root_item_lock);
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bbac58698a55cc0a6f0c0d69a6dcd3f9f3134c11 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 8 Mar 2022 13:36:38 +0800
Subject: [PATCH] btrfs: remove device item and update super block in the same
transaction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
[BUG]
There is a report that a btrfs has a bad super block num devices.
This makes btrfs to reject the fs completely.
BTRFS error (device sdd3): super_num_devices 3 mismatch with num_devices 2 found here
BTRFS error (device sdd3): failed to read chunk tree: -22
BTRFS error (device sdd3): open_ctree failed
[CAUSE]
During btrfs device removal, chunk tree and super block num devs are
updated in two different transactions:
btrfs_rm_device()
|- btrfs_rm_dev_item(device)
| |- trans = btrfs_start_transaction()
| | Now we got transaction X
| |
| |- btrfs_del_item()
| | Now device item is removed from chunk tree
| |
| |- btrfs_commit_transaction()
| Transaction X got committed, super num devs untouched,
| but device item removed from chunk tree.
| (AKA, super num devs is already incorrect)
|
|- cur_devices->num_devices--;
|- cur_devices->total_devices--;
|- btrfs_set_super_num_devices()
All those operations are not in transaction X, thus it will
only be written back to disk in next transaction.
So after the transaction X in btrfs_rm_dev_item() committed, but before
transaction X+1 (which can be minutes away), a power loss happen, then
we got the super num mismatch.
[FIX]
Instead of starting and committing a transaction inside
btrfs_rm_dev_item(), start a transaction in side btrfs_rm_device() and
pass it to btrfs_rm_dev_item().
And only commit the transaction after everything is done.
Reported-by: Luca Béla Palkovics <luca.bela.palkovics(a)gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CA+8xDSpvdm_U0QLBAnrH=zqDq_cWCOH5TiV46C…
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Anand Jain <anand.jain(a)oracle.com>
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 1be7cb2f955f..2cfbc74a3b4e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
path_put(&path);
}
-static int btrfs_rm_dev_item(struct btrfs_device *device)
+static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device)
{
struct btrfs_root *root = device->fs_info->chunk_root;
int ret;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_trans_handle *trans;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans)) {
- btrfs_free_path(path);
- return PTR_ERR(trans);
- }
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid;
@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
if (ret) {
if (ret > 0)
ret = -ENOENT;
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
goto out;
}
ret = btrfs_del_item(trans, root, path);
- if (ret) {
- btrfs_abort_transaction(trans, ret);
- btrfs_end_transaction(trans);
- }
-
out:
btrfs_free_path(path);
- if (!ret)
- ret = btrfs_commit_transaction(trans);
return ret;
}
@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
if (ret)
- goto out;
+ return ret;
device = btrfs_find_device(fs_info->fs_devices, args);
if (!device) {
@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else
ret = -ENOENT;
- goto out;
+ return ret;
}
if (btrfs_pinned_by_swapfile(fs_info, device)) {
btrfs_warn_in_rcu(fs_info,
"cannot remove device %s (devid %llu) due to active swapfile",
rcu_str_deref(device->name), device->devid);
- ret = -ETXTBSY;
- goto out;
+ return -ETXTBSY;
}
- if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
- ret = BTRFS_ERROR_DEV_TGT_REPLACE;
- goto out;
- }
+ if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
+ return BTRFS_ERROR_DEV_TGT_REPLACE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
- fs_info->fs_devices->rw_devices == 1) {
- ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
- goto out;
- }
+ fs_info->fs_devices->rw_devices == 1)
+ return BTRFS_ERROR_DEV_ONLY_WRITABLE;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex);
@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
if (ret)
goto error_undo;
- /*
- * TODO: the superblock still includes this device in its num_devices
- * counter although write_all_supers() is not locked out. This
- * could give a filesystem state which requires a degraded mount.
- */
- ret = btrfs_rm_dev_item(device);
- if (ret)
+ trans = btrfs_start_transaction(fs_info->chunk_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
goto error_undo;
+ }
+
+ ret = btrfs_rm_dev_item(trans, device);
+ if (ret) {
+ /* Any error in dev item removal is critical */
+ btrfs_crit(fs_info,
+ "failed to remove device item for devid %llu: %d",
+ device->devid, ret);
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+ }
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
btrfs_scrub_cancel_dev(device);
@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
free_fs_devices(cur_devices);
}
-out:
+ ret = btrfs_commit_transaction(trans);
+
return ret;
error_undo:
@@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex);
}
- goto out;
+ return ret;
}
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)