Commit 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in
shrink_delalloc") cleaned up how we do delalloc shrinking by utilizing
some infrastructure we have in place to flush inodes that we use for
device replace and snapshot. However this introduced a pretty serious
performance regression. To reproduce the user untarred the source
tarball of Firefox, and would see it take anywhere from 5 to 20 times as
long to untar in 5.10 compared to 5.9.
The root cause is because before we would generally use the normal
writeback path to reclaim delalloc space, and for this we would provide
it with the number of pages we wanted to flush. The referenced commit
changed this to flush that many inodes, which drastically increased the
amount of space we were flushing in certain cases, which severely
affected performance.
We cannot revert this patch unfortunately, because Filipe has another
fix that requires the ability to skip flushing inodes that are being
cloned in certain scenarios, which means we need to keep using our
flushing infrastructure or risk re-introducing the deadlock.
Instead to fix this problem we can go back to providing
btrfs_start_delalloc_roots with a number of pages to flush, and then set
up a writeback_control and utilize sync_inode() to handle the flushing
for us. This gives us the same behavior we had prior to the fix, while
still allowing us to avoid the deadlock that was fixed by Filipe. I
redid the users original test and got the following results
5.9 0m54.258s
5.10 1m26.212s
Patched 0m38.800s
We are significantly faster because of the work I did around improving
ENOSPC flushing in 5.10 and 5.11, so reverting to the previous write out
behavior gave us a pretty big boost.
CC: stable(a)vger.kernel.org # 5.10
Reported-by: René Rebe <rene(a)exactcode.de>
Fixes: 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc")
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
---
v1->v2:
- Explicitly state what the regression was in the commit message.
fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++------------
fs/btrfs/space-info.c | 4 ++-
2 files changed, 46 insertions(+), 18 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 070716650df8..a8e0a6b038d3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
+static int start_delalloc_inodes(struct btrfs_root *root,
+ struct writeback_control *wbc, bool snapshot,
bool in_reclaim_context)
{
struct btrfs_inode *binode;
@@ -9399,6 +9400,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
struct list_head works;
struct list_head splice;
int ret = 0;
+ bool full_flush = wbc->nr_to_write == LONG_MAX;
INIT_LIST_HEAD(&works);
INIT_LIST_HEAD(&splice);
@@ -9427,18 +9429,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
if (snapshot)
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
&binode->runtime_flags);
- work = btrfs_alloc_delalloc_work(inode);
- if (!work) {
- iput(inode);
- ret = -ENOMEM;
- goto out;
- }
- list_add_tail(&work->list, &works);
- btrfs_queue_work(root->fs_info->flush_workers,
- &work->work);
- if (*nr != U64_MAX) {
- (*nr)--;
- if (*nr == 0)
+ if (full_flush) {
+ work = btrfs_alloc_delalloc_work(inode);
+ if (!work) {
+ iput(inode);
+ ret = -ENOMEM;
+ goto out;
+ }
+ list_add_tail(&work->list, &works);
+ btrfs_queue_work(root->fs_info->flush_workers,
+ &work->work);
+ } else {
+ ret = sync_inode(inode, wbc);
+ if (!ret &&
+ test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ ret = sync_inode(inode, wbc);
+ btrfs_add_delayed_iput(inode);
+ if (ret || wbc->nr_to_write <= 0)
goto out;
}
cond_resched();
@@ -9464,18 +9472,29 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
{
+ struct writeback_control wbc = {
+ .nr_to_write = LONG_MAX,
+ .sync_mode = WB_SYNC_NONE,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ };
struct btrfs_fs_info *fs_info = root->fs_info;
- u64 nr = U64_MAX;
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
- return start_delalloc_inodes(root, &nr, true, false);
+ return start_delalloc_inodes(root, &wbc, true, false);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
bool in_reclaim_context)
{
+ struct writeback_control wbc = {
+ .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr,
+ .sync_mode = WB_SYNC_NONE,
+ .range_start = 0,
+ .range_end = LLONG_MAX,
+ };
struct btrfs_root *root;
struct list_head splice;
int ret;
@@ -9489,6 +9508,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
while (!list_empty(&splice) && nr) {
+ /*
+ * Reset nr_to_write here so we know that we're doing a full
+ * flush.
+ */
+ if (nr == U64_MAX)
+ wbc.nr_to_write = LONG_MAX;
+
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
root = btrfs_grab_root(root);
@@ -9497,9 +9523,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
+ ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
btrfs_put_root(root);
- if (ret < 0)
+ if (ret < 0 || wbc.nr_to_write <= 0)
goto out;
spin_lock(&fs_info->delalloc_root_lock);
}
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 67e55c5479b8..e8347461c8dd 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
loops = 0;
while ((delalloc_bytes || dio_bytes) && loops < 3) {
- btrfs_start_delalloc_roots(fs_info, items, true);
+ u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT;
+
+ btrfs_start_delalloc_roots(fs_info, nr_pages, true);
loops++;
if (wait_ordered && !trans) {
--
2.26.2
Ouya fails to detect the eMMC module when booted via certain bootloaders.
Fastboot and hard-kexec bootloaders fail while u-boot does not. It was
discovered that the issue manifests if the sdmmc4 alternate configuration
clock pin is input disabled.
Ouya uses sdmmc4 in the primary pin configuration. It is unknown why this
occurs, though it is likely related to other eMMC limitations experienced
on Ouya.
For now, fix it by enabling input on cam_mclk_pcc0.
Cc: stable(a)vger.kernel.org # 5.10+
Fixes: d7195ac5c9c5 ("ARM: tegra: Add device-tree for Ouya")
Reported-by: Matt Merhar <mattmerhar(a)protonmail.com>
Tested-by: Matt Merhar <mattmerhar(a)protonmail.com>
Signed-off-by: Peter Geis <pgwipeout(a)gmail.com>
---
Changes v2:
-Added stable tag.
-Improved commit message.
arch/arm/boot/dts/tegra30-ouya.dts | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts
index 74da1360d297..0368b3b816ef 100644
--- a/arch/arm/boot/dts/tegra30-ouya.dts
+++ b/arch/arm/boot/dts/tegra30-ouya.dts
@@ -4352,8 +4352,8 @@ cam_mclk_pcc0 {
nvidia,pins = "cam_mclk_pcc0";
nvidia,function = "vi_alt3";
nvidia,pull = <TEGRA_PIN_PULL_NONE>;
- nvidia,tristate = <TEGRA_PIN_ENABLE>;
- nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
};
pcc1 {
nvidia,pins = "pcc1";
--
2.25.1
This is the start of the stable review cycle for the 4.9.250 release.
There are 32 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat, 09 Jan 2021 14:08:13 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.250-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.9.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.9.250-rc1
Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
iio:magnetometer:mag3110: Fix alignment and data leak issues.
Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
iio:imu:bmi160: Fix alignment and data leak issues
Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
iio:imu:bmi160: Fix too large a buffer.
sayli karnik <karniksayli1995(a)gmail.com>
iio: bmi160_core: Fix sparse warning due to incorrect type in assignment
SeongJae Park <sjpark(a)amazon.de>
xenbus/xenbus_backend: Disallow pending watch messages
SeongJae Park <sjpark(a)amazon.de>
xen/xenbus: Count pending messages for each watch
SeongJae Park <sjpark(a)amazon.de>
xen/xenbus/xen_bus_type: Support will_handle watch callback
SeongJae Park <sjpark(a)amazon.de>
xen/xenbus: Add 'will_handle' callback support in xenbus_watch_path()
SeongJae Park <sjpark(a)amazon.de>
xen/xenbus: Allow watches discard events before queueing
Josh Poimboeuf <jpoimboe(a)redhat.com>
kdev_t: always inline major/minor helper functions
Jessica Yu <jeyu(a)kernel.org>
module: delay kobject uevent until after module init call
Qinglang Miao <miaoqinglang(a)huawei.com>
powerpc: sysdev: add missing iounmap() on error in mpic_msgr_probe()
Jan Kara <jack(a)suse.cz>
quota: Don't overflow quota file offsets
Miroslav Benes <mbenes(a)suse.cz>
module: set MODULE_STATE_GOING state when a module fails to load
Takashi Iwai <tiwai(a)suse.de>
ALSA: seq: Use bool for snd_seq_queue internal flags
Mauro Carvalho Chehab <mchehab+huawei(a)kernel.org>
media: gp8psk: initialize stats at power control logic
Anant Thazhemadam <anant.thazhemadam(a)gmail.com>
misc: vmw_vmci: fix kernel info-leak by initializing dbells in vmci_ctx_get_chkpt_doorbells()
Rustam Kovhaev <rkovhaev(a)gmail.com>
reiserfs: add check for an invalid ih_entry_count
Johan Hovold <johan(a)kernel.org>
of: fix linker-section match-table corruption
Petr Vorel <petr.vorel(a)gmail.com>
uapi: move constants from <linux/kernel.h> to <linux/const.h>
Paolo Abeni <pabeni(a)redhat.com>
l2tp: fix races with ipv4-mapped ipv6 addresses
Paolo Abeni <pabeni(a)redhat.com>
net: ipv6: keep sk status consistent after datagram connect failure
Johan Hovold <johan(a)kernel.org>
USB: serial: digi_acceleport: fix write-wakeup deadlocks
Stefan Haberland <sth(a)linux.ibm.com>
s390/dasd: fix hanging device offline processing
Eric Auger <eric.auger(a)redhat.com>
vfio/pci: Move dummy_resources_list init in vfio_pci_probe()
Kailang Yang <kailang(a)realtek.com>
ALSA: hda/realtek - Dell headphone has noise on unmute for ALC236
Hui Wang <hui.wang(a)canonical.com>
ALSA: hda - Fix a wrong FIXUP for alc289 on Dell machines
Kailang Yang <kailang(a)realtek.com>
ALSA: hda/realtek - Support Dell headset mode for ALC3271
Johan Hovold <johan(a)kernel.org>
ALSA: usb-audio: fix sync-ep altsetting sanity check
Alberto Aguirre <albaguirre(a)gmail.com>
ALSA: usb-audio: simplify set_sync_ep_implicit_fb_quirk
Takashi Iwai <tiwai(a)suse.de>
ALSA: hda/ca0132 - Fix work handling in delayed HP detection
Jan Beulich <JBeulich(a)suse.com>
x86/entry/64: Add instruction suffix
-------------
Diffstat:
Makefile | 4 +--
arch/powerpc/sysdev/mpic_msgr.c | 2 +-
arch/x86/entry/entry_64.S | 2 +-
drivers/block/xen-blkback/xenbus.c | 3 +-
drivers/iio/imu/bmi160/bmi160_core.c | 12 +++++--
drivers/iio/magnetometer/mag3110.c | 13 +++++---
drivers/media/usb/dvb-usb/gp8psk.c | 2 +-
drivers/misc/vmw_vmci/vmci_context.c | 2 +-
drivers/net/xen-netback/xenbus.c | 4 ++-
drivers/s390/block/dasd_alias.c | 10 +++++-
drivers/usb/serial/digi_acceleport.c | 45 ++++++++------------------
drivers/vfio/pci/vfio_pci.c | 4 +--
drivers/xen/xen-pciback/xenbus.c | 2 +-
drivers/xen/xenbus/xenbus_client.c | 8 ++++-
drivers/xen/xenbus/xenbus_probe.c | 1 +
drivers/xen/xenbus/xenbus_probe.h | 2 ++
drivers/xen/xenbus/xenbus_probe_backend.c | 7 +++++
drivers/xen/xenbus/xenbus_xs.c | 38 ++++++++++++++--------
fs/quota/quota_tree.c | 8 ++---
fs/reiserfs/stree.c | 6 ++++
include/linux/kdev_t.h | 22 ++++++-------
include/linux/of.h | 1 +
include/uapi/linux/const.h | 5 +++
include/uapi/linux/ethtool.h | 2 +-
include/uapi/linux/kernel.h | 9 +-----
include/uapi/linux/lightnvm.h | 2 +-
include/uapi/linux/mroute6.h | 2 +-
include/uapi/linux/netfilter/x_tables.h | 2 +-
include/uapi/linux/netlink.h | 2 +-
include/uapi/linux/sysctl.h | 2 +-
include/xen/xenbus.h | 15 ++++++++-
kernel/module.c | 6 ++--
net/ipv6/datagram.c | 21 ++++++++++---
net/l2tp/l2tp_core.c | 38 +++++++++++-----------
net/l2tp/l2tp_core.h | 3 --
sound/core/seq/seq_queue.h | 8 ++---
sound/pci/hda/patch_ca0132.c | 16 ++++++++--
sound/pci/hda/patch_realtek.c | 25 +++++++++++++--
sound/usb/pcm.c | 52 ++++++++++++-------------------
39 files changed, 242 insertions(+), 166 deletions(-)