The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 827aa18e7b903c5ff3b3cd8fec328a99b1dbd411 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 10 Dec 2018 17:53:35 +0000
Subject: [PATCH] Btrfs: use nofs context when initializing security xattrs to
avoid deadlock
When initializing the security xattrs, we are holding a transaction handle
therefore we need to use a GFP_NOFS context in order to avoid a deadlock
with reclaim in case it's triggered.
Fixes: 39a27ec1004e8 ("btrfs: use GFP_KERNEL for xattr and acl allocations")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index ea78c3d6dcfc..f141b45ce349 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -11,6 +11,7 @@
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
@@ -422,9 +423,15 @@ static int btrfs_initxattrs(struct inode *inode,
{
const struct xattr *xattr;
struct btrfs_trans_handle *trans = fs_info;
+ unsigned int nofs_flag;
char *name;
int err = 0;
+ /*
+ * We're holding a transaction handle, so use a NOFS memory allocation
+ * context to avoid deadlock if reclaim happens.
+ */
+ nofs_flag = memalloc_nofs_save();
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
strlen(xattr->name) + 1, GFP_KERNEL);
@@ -440,6 +447,7 @@ static int btrfs_initxattrs(struct inode *inode,
if (err < 0)
break;
}
+ memalloc_nofs_restore(nofs_flag);
return err;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 827aa18e7b903c5ff3b3cd8fec328a99b1dbd411 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 10 Dec 2018 17:53:35 +0000
Subject: [PATCH] Btrfs: use nofs context when initializing security xattrs to
avoid deadlock
When initializing the security xattrs, we are holding a transaction handle
therefore we need to use a GFP_NOFS context in order to avoid a deadlock
with reclaim in case it's triggered.
Fixes: 39a27ec1004e8 ("btrfs: use GFP_KERNEL for xattr and acl allocations")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index ea78c3d6dcfc..f141b45ce349 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -11,6 +11,7 @@
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
@@ -422,9 +423,15 @@ static int btrfs_initxattrs(struct inode *inode,
{
const struct xattr *xattr;
struct btrfs_trans_handle *trans = fs_info;
+ unsigned int nofs_flag;
char *name;
int err = 0;
+ /*
+ * We're holding a transaction handle, so use a NOFS memory allocation
+ * context to avoid deadlock if reclaim happens.
+ */
+ nofs_flag = memalloc_nofs_save();
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
strlen(xattr->name) + 1, GFP_KERNEL);
@@ -440,6 +447,7 @@ static int btrfs_initxattrs(struct inode *inode,
if (err < 0)
break;
}
+ memalloc_nofs_restore(nofs_flag);
return err;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 827aa18e7b903c5ff3b3cd8fec328a99b1dbd411 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 10 Dec 2018 17:53:35 +0000
Subject: [PATCH] Btrfs: use nofs context when initializing security xattrs to
avoid deadlock
When initializing the security xattrs, we are holding a transaction handle
therefore we need to use a GFP_NOFS context in order to avoid a deadlock
with reclaim in case it's triggered.
Fixes: 39a27ec1004e8 ("btrfs: use GFP_KERNEL for xattr and acl allocations")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index ea78c3d6dcfc..f141b45ce349 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -11,6 +11,7 @@
#include <linux/security.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "btrfs_inode.h"
#include "transaction.h"
@@ -422,9 +423,15 @@ static int btrfs_initxattrs(struct inode *inode,
{
const struct xattr *xattr;
struct btrfs_trans_handle *trans = fs_info;
+ unsigned int nofs_flag;
char *name;
int err = 0;
+ /*
+ * We're holding a transaction handle, so use a NOFS memory allocation
+ * context to avoid deadlock if reclaim happens.
+ */
+ nofs_flag = memalloc_nofs_save();
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
strlen(xattr->name) + 1, GFP_KERNEL);
@@ -440,6 +447,7 @@ static int btrfs_initxattrs(struct inode *inode,
if (err < 0)
break;
}
+ memalloc_nofs_restore(nofs_flag);
return err;
}
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9a6f209e36500efac51528132a3e3083586eda5f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 14:15:36 +0000
Subject: [PATCH] Btrfs: fix deadlock when enabling quotas due to concurrent
snapshot creation
If the quota enable and snapshot creation ioctls are called concurrently
we can get into a deadlock where the task enabling quotas will deadlock
on the fs_info->qgroup_ioctl_lock mutex because it attempts to lock it
twice, or the task creating a snapshot tries to commit the transaction
while the task enabling quota waits for the former task to commit the
transaction while holding the mutex. The following time diagrams show how
both cases happen.
First scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
create_pending_snapshots()
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, mutex already locked
by this task at
btrfs_quota_enable()
Second scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
--> waits for task at
CPU 0 to release
its transaction
handle
btrfs_commit_transaction()
--> sees another task started
the transaction commit first
--> releases its transaction
handle
--> waits for the transaction
commit to be completed by
the task at CPU 1
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, task at CPU 0
has the mutex locked but
it is waiting for us to
finish the transaction
commit
So fix this by setting the quota enabled flag in fs_info after committing
the transaction at btrfs_quota_enable(). This ends up serializing quota
enable and snapshot creation as if the snapshot creation happened just
before the quota enable request. The quota rescan task, scheduled after
committing the transaction in btrfs_quote_enable(), will do the accounting.
Fixes: 6426c7ad697d ("btrfs: qgroup: Fix qgroup accounting when creating snapshot")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b53d2e7b938d..2272419ade7e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1013,16 +1013,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
- spin_lock(&fs_info->qgroup_lock);
- fs_info->quota_root = quota_root;
- set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_commit_transaction(trans);
trans = NULL;
if (ret)
goto out_free_path;
+ /*
+ * Set quota enabled flag after committing the transaction, to avoid
+ * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
+ * creation.
+ */
+ spin_lock(&fs_info->qgroup_lock);
+ fs_info->quota_root = quota_root;
+ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ spin_unlock(&fs_info->qgroup_lock);
+
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9a6f209e36500efac51528132a3e3083586eda5f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 14:15:36 +0000
Subject: [PATCH] Btrfs: fix deadlock when enabling quotas due to concurrent
snapshot creation
If the quota enable and snapshot creation ioctls are called concurrently
we can get into a deadlock where the task enabling quotas will deadlock
on the fs_info->qgroup_ioctl_lock mutex because it attempts to lock it
twice, or the task creating a snapshot tries to commit the transaction
while the task enabling quota waits for the former task to commit the
transaction while holding the mutex. The following time diagrams show how
both cases happen.
First scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
create_pending_snapshots()
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, mutex already locked
by this task at
btrfs_quota_enable()
Second scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
--> waits for task at
CPU 0 to release
its transaction
handle
btrfs_commit_transaction()
--> sees another task started
the transaction commit first
--> releases its transaction
handle
--> waits for the transaction
commit to be completed by
the task at CPU 1
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, task at CPU 0
has the mutex locked but
it is waiting for us to
finish the transaction
commit
So fix this by setting the quota enabled flag in fs_info after committing
the transaction at btrfs_quota_enable(). This ends up serializing quota
enable and snapshot creation as if the snapshot creation happened just
before the quota enable request. The quota rescan task, scheduled after
committing the transaction in btrfs_quote_enable(), will do the accounting.
Fixes: 6426c7ad697d ("btrfs: qgroup: Fix qgroup accounting when creating snapshot")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b53d2e7b938d..2272419ade7e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1013,16 +1013,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
- spin_lock(&fs_info->qgroup_lock);
- fs_info->quota_root = quota_root;
- set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_commit_transaction(trans);
trans = NULL;
if (ret)
goto out_free_path;
+ /*
+ * Set quota enabled flag after committing the transaction, to avoid
+ * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
+ * creation.
+ */
+ spin_lock(&fs_info->qgroup_lock);
+ fs_info->quota_root = quota_root;
+ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ spin_unlock(&fs_info->qgroup_lock);
+
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9a6f209e36500efac51528132a3e3083586eda5f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 14:15:36 +0000
Subject: [PATCH] Btrfs: fix deadlock when enabling quotas due to concurrent
snapshot creation
If the quota enable and snapshot creation ioctls are called concurrently
we can get into a deadlock where the task enabling quotas will deadlock
on the fs_info->qgroup_ioctl_lock mutex because it attempts to lock it
twice, or the task creating a snapshot tries to commit the transaction
while the task enabling quota waits for the former task to commit the
transaction while holding the mutex. The following time diagrams show how
both cases happen.
First scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
create_pending_snapshots()
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, mutex already locked
by this task at
btrfs_quota_enable()
Second scenario:
CPU 0 CPU 1
btrfs_ioctl()
btrfs_ioctl_quota_ctl()
btrfs_quota_enable()
mutex_lock(fs_info->qgroup_ioctl_lock)
btrfs_start_transaction()
btrfs_ioctl()
btrfs_ioctl_snap_create_v2
create_snapshot()
--> adds snapshot to the
list pending_snapshots
of the current
transaction
btrfs_commit_transaction()
--> waits for task at
CPU 0 to release
its transaction
handle
btrfs_commit_transaction()
--> sees another task started
the transaction commit first
--> releases its transaction
handle
--> waits for the transaction
commit to be completed by
the task at CPU 1
create_pending_snapshot()
qgroup_account_snapshot()
btrfs_qgroup_inherit()
mutex_lock(fs_info->qgroup_ioctl_lock)
--> deadlock, task at CPU 0
has the mutex locked but
it is waiting for us to
finish the transaction
commit
So fix this by setting the quota enabled flag in fs_info after committing
the transaction at btrfs_quota_enable(). This ends up serializing quota
enable and snapshot creation as if the snapshot creation happened just
before the quota enable request. The quota rescan task, scheduled after
committing the transaction in btrfs_quote_enable(), will do the accounting.
Fixes: 6426c7ad697d ("btrfs: qgroup: Fix qgroup accounting when creating snapshot")
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b53d2e7b938d..2272419ade7e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1013,16 +1013,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
- spin_lock(&fs_info->qgroup_lock);
- fs_info->quota_root = quota_root;
- set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- spin_unlock(&fs_info->qgroup_lock);
ret = btrfs_commit_transaction(trans);
trans = NULL;
if (ret)
goto out_free_path;
+ /*
+ * Set quota enabled flag after committing the transaction, to avoid
+ * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
+ * creation.
+ */
+ spin_lock(&fs_info->qgroup_lock);
+ fs_info->quota_root = quota_root;
+ set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
+ spin_unlock(&fs_info->qgroup_lock);
+
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5a8067c0d17feb7579db0476191417b441a8996e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 09:48:12 +0000
Subject: [PATCH] Btrfs: fix access to available allocation bits when starting
balance
The available allocation bits members from struct btrfs_fs_info are
protected by a sequence lock, and when starting balance we access them
incorrectly in two different ways:
1) In the read sequence lock loop at btrfs_balance() we use the values we
read from fs_info->avail_*_alloc_bits and we can immediately do actions
that have side effects and can not be undone (printing a message and
jumping to a label). This is wrong because a retry might be needed, so
our actions must not have side effects and must be repeatable as long
as read_seqretry() returns a non-zero value. In other words, we were
essentially ignoring the sequence lock;
2) Right below the read sequence lock loop, we were reading the values
from avail_metadata_alloc_bits and avail_data_alloc_bits without any
protection from concurrent writers, that is, reading them outside of
the read sequence lock critical section.
So fix this by making sure we only read the available allocation bits
while in a read sequence lock critical section and that what we do in the
critical section is repeatable (has nothing that can not be undone) so
that any eventual retry that is needed is handled properly.
Fixes: de98ced9e743 ("Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits")
Fixes: 14506127979a ("btrfs: fix a bogus warning when converting only data or metadata")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a89596c3de56..2ebc827bed88 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3938,6 +3938,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
int ret;
u64 num_devices;
unsigned seq;
+ bool reducing_integrity;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
@@ -4017,24 +4018,30 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
!(bctl->sys.target & allowed)) ||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
- !(bctl->meta.target & allowed))) {
- if (bctl->flags & BTRFS_BALANCE_FORCE) {
- btrfs_info(fs_info,
- "balance: force reducing metadata integrity");
- } else {
- btrfs_err(fs_info,
- "balance: reduces metadata integrity, use --force if you want this");
- ret = -EINVAL;
- goto out;
- }
- }
+ !(bctl->meta.target & allowed)))
+ reducing_integrity = true;
+ else
+ reducing_integrity = false;
+
+ /* if we're not converting, the target field is uninitialized */
+ meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+ data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->data.target : fs_info->avail_data_alloc_bits;
} while (read_seqretry(&fs_info->profiles_lock, seq));
- /* if we're not converting, the target field is uninitialized */
- meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->meta.target : fs_info->avail_metadata_alloc_bits;
- data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->data.target : fs_info->avail_data_alloc_bits;
+ if (reducing_integrity) {
+ if (bctl->flags & BTRFS_BALANCE_FORCE) {
+ btrfs_info(fs_info,
+ "balance: force reducing metadata integrity");
+ } else {
+ btrfs_err(fs_info,
+ "balance: reduces metadata integrity, use --force if you want this");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5a8067c0d17feb7579db0476191417b441a8996e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 09:48:12 +0000
Subject: [PATCH] Btrfs: fix access to available allocation bits when starting
balance
The available allocation bits members from struct btrfs_fs_info are
protected by a sequence lock, and when starting balance we access them
incorrectly in two different ways:
1) In the read sequence lock loop at btrfs_balance() we use the values we
read from fs_info->avail_*_alloc_bits and we can immediately do actions
that have side effects and can not be undone (printing a message and
jumping to a label). This is wrong because a retry might be needed, so
our actions must not have side effects and must be repeatable as long
as read_seqretry() returns a non-zero value. In other words, we were
essentially ignoring the sequence lock;
2) Right below the read sequence lock loop, we were reading the values
from avail_metadata_alloc_bits and avail_data_alloc_bits without any
protection from concurrent writers, that is, reading them outside of
the read sequence lock critical section.
So fix this by making sure we only read the available allocation bits
while in a read sequence lock critical section and that what we do in the
critical section is repeatable (has nothing that can not be undone) so
that any eventual retry that is needed is handled properly.
Fixes: de98ced9e743 ("Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits")
Fixes: 14506127979a ("btrfs: fix a bogus warning when converting only data or metadata")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a89596c3de56..2ebc827bed88 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3938,6 +3938,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
int ret;
u64 num_devices;
unsigned seq;
+ bool reducing_integrity;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
@@ -4017,24 +4018,30 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
!(bctl->sys.target & allowed)) ||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
- !(bctl->meta.target & allowed))) {
- if (bctl->flags & BTRFS_BALANCE_FORCE) {
- btrfs_info(fs_info,
- "balance: force reducing metadata integrity");
- } else {
- btrfs_err(fs_info,
- "balance: reduces metadata integrity, use --force if you want this");
- ret = -EINVAL;
- goto out;
- }
- }
+ !(bctl->meta.target & allowed)))
+ reducing_integrity = true;
+ else
+ reducing_integrity = false;
+
+ /* if we're not converting, the target field is uninitialized */
+ meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+ data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->data.target : fs_info->avail_data_alloc_bits;
} while (read_seqretry(&fs_info->profiles_lock, seq));
- /* if we're not converting, the target field is uninitialized */
- meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->meta.target : fs_info->avail_metadata_alloc_bits;
- data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->data.target : fs_info->avail_data_alloc_bits;
+ if (reducing_integrity) {
+ if (bctl->flags & BTRFS_BALANCE_FORCE) {
+ btrfs_info(fs_info,
+ "balance: force reducing metadata integrity");
+ } else {
+ btrfs_err(fs_info,
+ "balance: reduces metadata integrity, use --force if you want this");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 5a8067c0d17feb7579db0476191417b441a8996e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Mon, 19 Nov 2018 09:48:12 +0000
Subject: [PATCH] Btrfs: fix access to available allocation bits when starting
balance
The available allocation bits members from struct btrfs_fs_info are
protected by a sequence lock, and when starting balance we access them
incorrectly in two different ways:
1) In the read sequence lock loop at btrfs_balance() we use the values we
read from fs_info->avail_*_alloc_bits and we can immediately do actions
that have side effects and can not be undone (printing a message and
jumping to a label). This is wrong because a retry might be needed, so
our actions must not have side effects and must be repeatable as long
as read_seqretry() returns a non-zero value. In other words, we were
essentially ignoring the sequence lock;
2) Right below the read sequence lock loop, we were reading the values
from avail_metadata_alloc_bits and avail_data_alloc_bits without any
protection from concurrent writers, that is, reading them outside of
the read sequence lock critical section.
So fix this by making sure we only read the available allocation bits
while in a read sequence lock critical section and that what we do in the
critical section is repeatable (has nothing that can not be undone) so
that any eventual retry that is needed is handled properly.
Fixes: de98ced9e743 ("Btrfs: use seqlock to protect fs_info->avail_{data, metadata, system}_alloc_bits")
Fixes: 14506127979a ("btrfs: fix a bogus warning when converting only data or metadata")
Reviewed-by: Nikolay Borisov <nborisov(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a89596c3de56..2ebc827bed88 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3938,6 +3938,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
int ret;
u64 num_devices;
unsigned seq;
+ bool reducing_integrity;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
@@ -4017,24 +4018,30 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
!(bctl->sys.target & allowed)) ||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
- !(bctl->meta.target & allowed))) {
- if (bctl->flags & BTRFS_BALANCE_FORCE) {
- btrfs_info(fs_info,
- "balance: force reducing metadata integrity");
- } else {
- btrfs_err(fs_info,
- "balance: reduces metadata integrity, use --force if you want this");
- ret = -EINVAL;
- goto out;
- }
- }
+ !(bctl->meta.target & allowed)))
+ reducing_integrity = true;
+ else
+ reducing_integrity = false;
+
+ /* if we're not converting, the target field is uninitialized */
+ meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->meta.target : fs_info->avail_metadata_alloc_bits;
+ data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
+ bctl->data.target : fs_info->avail_data_alloc_bits;
} while (read_seqretry(&fs_info->profiles_lock, seq));
- /* if we're not converting, the target field is uninitialized */
- meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->meta.target : fs_info->avail_metadata_alloc_bits;
- data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
- bctl->data.target : fs_info->avail_data_alloc_bits;
+ if (reducing_integrity) {
+ if (bctl->flags & BTRFS_BALANCE_FORCE) {
+ btrfs_info(fs_info,
+ "balance: force reducing metadata integrity");
+ } else {
+ btrfs_err(fs_info,
+ "balance: reduces metadata integrity, use --force if you want this");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 53290432145a8eb143fe29e06e9c1465d43dc723 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon(a)arm.com>
Date: Thu, 3 Jan 2019 18:00:39 +0000
Subject: [PATCH] arm64: compat: Don't pull syscall number from regs in
arm_compat_syscall
The syscall number may have been changed by a tracer, so we should pass
the actual number in from the caller instead of pulling it from the
saved r7 value directly.
Cc: <stable(a)vger.kernel.org>
Cc: Pi-Hsun Shih <pihsun(a)chromium.org>
Reviewed-by: Dave Martin <Dave.Martin(a)arm.com>
Signed-off-by: Will Deacon <will.deacon(a)arm.com>
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index a79db4e485a6..bc348ab3dd6b 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -66,12 +66,11 @@ do_compat_cache_op(unsigned long start, unsigned long end, int flags)
/*
* Handle all unrecognised system calls.
*/
-long compat_arm_syscall(struct pt_regs *regs)
+long compat_arm_syscall(struct pt_regs *regs, int scno)
{
- unsigned int no = regs->regs[7];
void __user *addr;
- switch (no) {
+ switch (scno) {
/*
* Flush a region from virtual address 'r0' to virtual address 'r1'
* _exclusive_. There is no alignment requirement on either address;
@@ -107,7 +106,7 @@ long compat_arm_syscall(struct pt_regs *regs)
* way the calling program can gracefully determine whether
* a feature is supported.
*/
- if (no < __ARM_NR_COMPAT_END)
+ if (scno < __ARM_NR_COMPAT_END)
return -ENOSYS;
break;
}
@@ -116,6 +115,6 @@ long compat_arm_syscall(struct pt_regs *regs)
(compat_thumb_mode(regs) ? 2 : 4);
arm64_notify_die("Oops - bad compat syscall(2)", regs,
- SIGILL, ILL_ILLTRP, addr, no);
+ SIGILL, ILL_ILLTRP, addr, scno);
return 0;
}
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 032d22312881..5610ac01c1ec 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -13,16 +13,15 @@
#include <asm/thread_info.h>
#include <asm/unistd.h>
-long compat_arm_syscall(struct pt_regs *regs);
-
+long compat_arm_syscall(struct pt_regs *regs, int scno);
long sys_ni_syscall(void);
-asmlinkage long do_ni_syscall(struct pt_regs *regs)
+static long do_ni_syscall(struct pt_regs *regs, int scno)
{
#ifdef CONFIG_COMPAT
long ret;
if (is_compat_task()) {
- ret = compat_arm_syscall(regs);
+ ret = compat_arm_syscall(regs, scno);
if (ret != -ENOSYS)
return ret;
}
@@ -47,7 +46,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
ret = __invoke_syscall(regs, syscall_fn);
} else {
- ret = do_ni_syscall(regs);
+ ret = do_ni_syscall(regs, scno);
}
regs->regs[0] = ret;