The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5f9062a48db260fd6b53d86ecfb4d5dc59266316
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102858-yield-gestate-e6bc@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5f9062a48db260fd6b53d86ecfb4d5dc59266316 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 10 Sep 2024 15:21:04 +0930
Subject: [PATCH] btrfs: qgroup: set a more sane default value for subtree drop
threshold
Since commit 011b46c30476 ("btrfs: skip subtree scan if it's too high to
avoid low stall in btrfs_commit_transaction()"), btrfs qgroup can
automatically skip large subtree scan at the cost of marking qgroup
inconsistent.
It's designed to address the final performance problem of snapshot drop
with qgroup enabled, but to be safe the default value is
BTRFS_MAX_LEVEL, requiring a user space daemon to set a different value
to make it work.
I'd say it's not a good idea to rely on user space tool to set this
default value, especially when some operations (snapshot dropping) can
be triggered immediately after mount, leaving a very small window to
that that sysfs interface.
So instead of disabling this new feature by default, enable it with a
low threshold (3), so that large subvolume tree drop at mount time won't
cause huge qgroup workload.
CC: stable(a)vger.kernel.org # 6.1
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1238a38c59b2..5afb68c0304b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1959,7 +1959,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1332ec59c539..a0e8deca87a7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1407,7 +1407,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
spin_unlock(&fs_info->qgroup_lock);
btrfs_free_qgroup_config(fs_info);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 98adf4ec7b01..c229256d6fd5 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -121,6 +121,8 @@ struct btrfs_inode;
#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1ULL << 63)
#define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1ULL << 62)
+#define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT (3)
+
/*
* Record a dirty extent, and info qgroup to update quota on it
*/
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 5f9062a48db260fd6b53d86ecfb4d5dc59266316
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024102856-chance-seventh-bedd@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5f9062a48db260fd6b53d86ecfb4d5dc59266316 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu(a)suse.com>
Date: Tue, 10 Sep 2024 15:21:04 +0930
Subject: [PATCH] btrfs: qgroup: set a more sane default value for subtree drop
threshold
Since commit 011b46c30476 ("btrfs: skip subtree scan if it's too high to
avoid low stall in btrfs_commit_transaction()"), btrfs qgroup can
automatically skip large subtree scan at the cost of marking qgroup
inconsistent.
It's designed to address the final performance problem of snapshot drop
with qgroup enabled, but to be safe the default value is
BTRFS_MAX_LEVEL, requiring a user space daemon to set a different value
to make it work.
I'd say it's not a good idea to rely on user space tool to set this
default value, especially when some operations (snapshot dropping) can
be triggered immediately after mount, leaving a very small window to
that that sysfs interface.
So instead of disabling this new feature by default, enable it with a
low threshold (3), so that large subvolume tree drop at mount time won't
cause huge qgroup workload.
CC: stable(a)vger.kernel.org # 6.1
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1238a38c59b2..5afb68c0304b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1959,7 +1959,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1332ec59c539..a0e8deca87a7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1407,7 +1407,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
spin_unlock(&fs_info->qgroup_lock);
btrfs_free_qgroup_config(fs_info);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 98adf4ec7b01..c229256d6fd5 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -121,6 +121,8 @@ struct btrfs_inode;
#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1ULL << 63)
#define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1ULL << 62)
+#define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT (3)
+
/*
* Record a dirty extent, and info qgroup to update quota on it
*/
The Synopsys DesignWare mmc controller on the JH7110 SoC
(dw_mmc-starfive.c driver) is using a 32-bit IDMAC address bus width,
and thus requires the use of SWIOTLB.
The commit 8396c793ffdf ("mmc: dw_mmc: Fix IDMAC operation with pages
bigger than 4K") increased the max_seq_size, even for 4K pages, causing
"swiotlb buffer is full" to happen because swiotlb can only handle a
memory size up to 256kB only.
Fix the issue, by making sure the dw_mmc driver doesn't use segments
bigger than what SWIOTLB can handle.
Reported-by: Ron Economos <re(a)w6rz.net>
Reported-by: Jing Luo <jing(a)jing.rocks>
Fixes: 8396c793ffdf ("mmc: dw_mmc: Fix IDMAC operation with pages bigger than 4K")
Cc: stable(a)vger.kernel.org
Signed-off-by: Aurelien Jarno <aurelien(a)aurel32.net>
---
drivers/mmc/host/dw_mmc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 41e451235f637..dc0d6201f7b73 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2958,7 +2958,8 @@ static int dw_mci_init_slot(struct dw_mci *host)
mmc->max_segs = host->ring_size;
mmc->max_blk_size = 65535;
mmc->max_req_size = DW_MCI_DESC_DATA_LENGTH * host->ring_size;
- mmc->max_seg_size = mmc->max_req_size;
+ mmc->max_seg_size =
+ min_t(size_t, mmc->max_req_size, dma_max_mapping_size(host->dev));
mmc->max_blk_count = mmc->max_req_size / 512;
} else if (host->use_dma == TRANS_MODE_EDMAC) {
mmc->max_segs = 64;
--
2.45.2
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: b5413156bad91dc2995a5c4eab1b05e56914638a
Gitweb: https://git.kernel.org/tip/b5413156bad91dc2995a5c4eab1b05e56914638a
Author: Benjamin Segall <bsegall(a)google.com>
AuthorDate: Fri, 25 Oct 2024 18:35:35 -07:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Sun, 27 Oct 2024 10:36:04 +01:00
posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone
When cloning a new thread, its posix_cputimers are not inherited, and
are cleared by posix_cputimers_init(). However, this does not clear the
tick dependency it creates in tsk->tick_dep_mask, and the handler does
not reach the code to clear the dependency if there were no timers to
begin with.
Thus if a thread has a cputimer running before clone/fork, all
descendants will prevent nohz_full unless they create a cputimer of
their own.
Fix this by entirely clearing the tick_dep_mask in copy_process().
(There is currently no inherited state that needs a tick dependency)
Process-wide timers do not have this problem because fork does not copy
signal_struct as a baseline, it creates one from scratch.
Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model")
Signed-off-by: Ben Segall <bsegall(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com
---
include/linux/tick.h | 8 ++++++++
kernel/fork.c | 2 ++
2 files changed, 10 insertions(+)
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7274463..99c9c5a 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a..6fa9fe6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);
Dear Linux Community Members,
Over the years, various informal groups have formed within our community,
serving purposes such as maintaining connections with companies and external
bodies, handling sensitive information, making challenging decisions, and,
at times, representing the community as a whole. These groups contribute significantly
to our community's development and deserve our recognition and appreciation.
I'll name a few below that I identified from `Documentation/`:
- Code of Conduct Committee <conduct(a)kernel.org>
- Linux kernel security team <security(a)kernel.org>
- Linux kernel hardware security team <hardware-security(a)kernel.org>
- Kernel CVE assignment team <cve(a)kernel.org>
- Stable Team for unpublished vulnerabilities <stable(a)kernel.org>
(I suspect it's just an alias to regular stable team, but I found no evidence).
Over recent events, I've taken a closer look at how our community's governance
operates, only to find that there's remarkably little public information available
about those informal groups. With the exception of the Linux kernel hardware security
team, it seems none of these groups maintain a public list of members that I can
easily find.
Upon digging into the details, I’d like to raise a few concerns and offer some thoughts
for further discussion:
- Absence of a Membership Register
Our community is built on mutual trust. Without knowing who comprises these groups,
it's understandably difficult for people to have full confidence in their work.
A publicly available membership list would not only foster trust but also allow us to
address our recognition and appreciation.
- Lack of Guidelines for Actions
Many of these groups appear to operate without documented guidelines. While I trust each
respectful individual's integrity, documented guidelines would enable the wider community
to better understand and appreciate the roles and responsibilities involved.
- Insufficient Transparency in Decision-Making
I fully respect the need for confidentiality in handling security matters, yet some
degree of openness around decision-making processes is essential in my opinion.
Releasing communications post-embargo, for instance, could promote understanding and
prevent potential abuse of confidential procedures.
- No Conflict of Interest Policy
Particularly in the case of the Code of Conduct Committee, there may arise situations
where individuals face challenging decisions involving personal connections. A conflict
of interest policy would provide valuable guidance in such circumstances.
Thank you for reading. I know none of us enjoy being pulled away by these non-technical
concerns, we love coding after all. However, I feel these concerns are vital for the
community's continued health. It might be a candidate of Linux TAB discussion.
I'm looking forward to everyone's input.
Thanks
- Jiaxun
Since commit 50ea5449c563 ("can: mcp251xfd: fix ring configuration
when switching from CAN-CC to CAN-FD mode"), the current ring and
coalescing configuration is passed to can_ram_get_layout(). That fixed
the issue when switching between CAN-CC and CAN-FD mode with
configured ring (rx, tx) and/or coalescing parameters (rx-frames-irq,
tx-frames-irq).
However 50ea5449c563 ("can: mcp251xfd: fix ring configuration when
switching from CAN-CC to CAN-FD mode"), introduced a regression when
switching CAN modes with disabled coalescing configuration: Even if
the previous CAN mode has no coalescing configured, the new mode is
configured with active coalescing. This leads to delayed receiving of
CAN-FD frames.
This comes from the fact, that ethtool uses usecs = 0 and max_frames =
1 to disable coalescing, however the driver uses internally
priv->{rx,tx}_obj_num_coalesce_irq = 0 to indicate disabled
coalescing.
Fix the regression by assigning struct ethtool_coalesce
ec->{rx,tx}_max_coalesced_frames_irq = 1 if coalescing is disabled in
the driver as can_ram_get_layout() expects this.
Reported-by: https://github.com/vdh-robothania
Closes: https://github.com/raspberrypi/linux/issues/6407
Fixes: 50ea5449c563 ("can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
index e684991fa3917d4f6b6ebda8329f72971237574e..7209a831f0f2089e409c6be635f0e5dc7b2271da 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c
@@ -2,7 +2,7 @@
//
// mcp251xfd - Microchip MCP251xFD Family CAN controller driver
//
-// Copyright (c) 2019, 2020, 2021 Pengutronix,
+// Copyright (c) 2019, 2020, 2021, 2024 Pengutronix,
// Marc Kleine-Budde <kernel(a)pengutronix.de>
//
// Based on:
@@ -483,9 +483,11 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv)
};
const struct ethtool_coalesce ec = {
.rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq,
- .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq,
+ .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq == 0 ?
+ 1 : priv->rx_obj_num_coalesce_irq,
.tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq,
- .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq,
+ .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq == 0 ?
+ 1 : priv->tx_obj_num_coalesce_irq,
};
struct can_ram_layout layout;
---
base-commit: 9efc44fb2dba6138b0575826319200049078679a
change-id: 20241010-mcp251xfd-fix-coalesing-f373066dd42e
Best regards,
--
Marc Kleine-Budde <mkl(a)pengutronix.de>
The quilt patch titled
Subject: mm: avoid unconditional one-tick sleep when swapcache_prepare fails
has been removed from the -mm tree. Its filename was
mm-avoid-unconditional-one-tick-sleep-when-swapcache_prepare-fails.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Barry Song <v-songbaohua(a)oppo.com>
Subject: mm: avoid unconditional one-tick sleep when swapcache_prepare fails
Date: Fri, 27 Sep 2024 09:19:36 +1200
Commit 13ddaf26be32 ("mm/swap: fix race when skipping swapcache")
introduced an unconditional one-tick sleep when `swapcache_prepare()`
fails, which has led to reports of UI stuttering on latency-sensitive
Android devices. To address this, we can use a waitqueue to wake up tasks
that fail `swapcache_prepare()` sooner, instead of always sleeping for a
full tick. While tasks may occasionally be woken by an unrelated
`do_swap_page()`, this method is preferable to two scenarios: rapid
re-entry into page faults, which can cause livelocks, and multiple
millisecond sleeps, which visibly degrade user experience.
Oven's testing shows that a single waitqueue resolves the UI stuttering
issue. If a 'thundering herd' problem becomes apparent later, a waitqueue
hash similar to `folio_wait_table[PAGE_WAIT_TABLE_SIZE]` for page bit
locks can be introduced.
[v-songbaohua(a)oppo.com: wake_up only when swapcache_wq waitqueue is active]
Link: https://lkml.kernel.org/r/20241008130807.40833-1-21cnbao@gmail.com
Link: https://lkml.kernel.org/r/20240926211936.75373-1-21cnbao@gmail.com
Fixes: 13ddaf26be32 ("mm/swap: fix race when skipping swapcache")
Signed-off-by: Barry Song <v-songbaohua(a)oppo.com>
Reported-by: Oven Liyang <liyangouwen1(a)oppo.com>
Tested-by: Oven Liyang <liyangouwen1(a)oppo.com>
Cc: Kairui Song <kasong(a)tencent.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Chris Li <chrisl(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Yosry Ahmed <yosryahmed(a)google.com>
Cc: SeongJae Park <sj(a)kernel.org>
Cc: Kalesh Singh <kaleshsingh(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
--- a/mm/memory.c~mm-avoid-unconditional-one-tick-sleep-when-swapcache_prepare-fails
+++ a/mm/memory.c
@@ -4187,6 +4187,8 @@ static struct folio *alloc_swap_folio(st
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static DECLARE_WAIT_QUEUE_HEAD(swapcache_wq);
+
/*
* We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
@@ -4199,6 +4201,7 @@ vm_fault_t do_swap_page(struct vm_fault
{
struct vm_area_struct *vma = vmf->vma;
struct folio *swapcache, *folio = NULL;
+ DECLARE_WAITQUEUE(wait, current);
struct page *page;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
@@ -4297,7 +4300,9 @@ vm_fault_t do_swap_page(struct vm_fault
* Relax a bit to prevent rapid
* repeated page faults.
*/
+ add_wait_queue(&swapcache_wq, &wait);
schedule_timeout_uninterruptible(1);
+ remove_wait_queue(&swapcache_wq, &wait);
goto out_page;
}
need_clear_cache = true;
@@ -4604,8 +4609,11 @@ unlock:
pte_unmap_unlock(vmf->pte, vmf->ptl);
out:
/* Clear the swap cache pin for direct swapin after PTL unlock */
- if (need_clear_cache)
+ if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
+ }
if (si)
put_swap_device(si);
return ret;
@@ -4620,8 +4628,11 @@ out_release:
folio_unlock(swapcache);
folio_put(swapcache);
}
- if (need_clear_cache)
+ if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
+ }
if (si)
put_swap_device(si);
return ret;
_
Patches currently in -mm which might be from v-songbaohua(a)oppo.com are
mm-fix-pswpin-counter-for-large-folios-swap-in.patch