The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b8ada54fa1b83f3b6480d4cced71354301750153
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023062303-crazily-recent-78b0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ada54fa1b83f3b6480d4cced71354301750153 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov(a)omp.ru>
Date: Sat, 17 Jun 2023 23:36:12 +0300
Subject: [PATCH] mmc: meson-gx: fix deferred probing
The driver overrides the error codes and IRQ0 returned by platform_get_irq()
to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe
permanently instead of the deferred probing. Switch to propagating the error
codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0
in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs,
so we now can safely ignore it...
Fixes: cbcaac6d7dd2 ("mmc: meson-gx-mmc: Fix platform_get_irq's error checking")
Cc: stable(a)vger.kernel.org # v5.19+
Signed-off-by: Sergey Shtylyov <s.shtylyov(a)omp.ru>
Reviewed-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20230617203622.6812-3-s.shtylyov@omp.ru
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index f90b0fd8d8b0..ee9a25b900ae 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -1186,8 +1186,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
return PTR_ERR(host->regs);
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0)
- return -EINVAL;
+ if (host->irq < 0)
+ return host->irq;
cd_irq = platform_get_irq_optional(pdev, 1);
mmc_gpio_set_cd_irq(mmc, cd_irq);
Hi,
I notice a regression report on Bugzilla [1]. Quoting from it:
> after updating from 6.2.x to 6.3.x, vmalloc error messages started to appear in the dmesg
>
>
>
> # free
> total used free shared buff/cache available
> Mem: 16183724 1473068 205664 33472 14504992 14335700
> Swap: 16777212 703596 16073616
>
>
> (zswap enabled)
See bugzilla for the full thread and attached dmesg.
On the report, the reporter can't perform the required bisection,
unfortunately.
Anyway, I'm adding it to regzbot:
#regzbot introduced: v6.2..v6.3 https://bugzilla.kernel.org/show_bug.cgi?id=217466
#regzbot title: btrfs_work_helper dealloc error in v6.3.x
Thanks.
[1]: https://bugzilla.kernel.org/show_bug.cgi?id=217466
--
An old man doll... just what I always wanted! - Clara
pl330_pause() does not set anything to indicate paused condition which
causes pl330_tx_status() to return DMA_IN_PROGRESS. This breaks 8250
DMA flush after the fix in commit 57e9af7831dc ("serial: 8250_dma: Fix
DMA Rx rearm race"). The function comment for pl330_pause() claims
pause is supported but resume is not which is enough for 8250 DMA flush
to work as long as DMA status reports DMA_PAUSED when appropriate.
Add PAUSED state for descriptor and mark BUSY descriptors with PAUSED
in pl330_pause(). Return DMA_PAUSED from pl330_tx_status() when the
descriptor is PAUSED.
Reported-by: Richard Tresidder <rtresidd(a)electromag.com.au>
Tested-by: Richard Tresidder <rtresidd(a)electromag.com.au>
Fixes: 88987d2c7534 ("dmaengine: pl330: add DMA_PAUSE feature")
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/linux-serial/f8a86ecd-64b1-573f-c2fa-59f541083f1a@e…
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
---
$ diff -u <(git grep -l -e '\.device_pause' -e '->device_pause') <(git grep -l DMA_PAUSED)
...tells there might a few other drivers which do not properly return
DMA_PAUSED status despite having a pause function.
drivers/dma/pl330.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 0d9257fbdfb0..daad25f2c498 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -403,6 +403,12 @@ enum desc_status {
* of a channel can be BUSY at any time.
*/
BUSY,
+ /*
+ * Pause was called while descriptor was BUSY. Due to hardware
+ * limitations, only termination is possible for descriptors
+ * that have been paused.
+ */
+ PAUSED,
/*
* Sitting on the channel work_list but xfer done
* by PL330 core
@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
list_for_each_entry(desc, &pch->work_list, node) {
/* If already submitted */
- if (desc->status == BUSY)
+ if (desc->status == BUSY || desc->status == PAUSED)
continue;
ret = pl330_submit_req(pch->thread, desc);
@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
{
struct dma_pl330_chan *pch = to_pchan(chan);
struct pl330_dmac *pl330 = pch->dmac;
+ struct dma_pl330_desc *desc;
unsigned long flags;
pm_runtime_get_sync(pl330->ddma.dev);
@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
_stop(pch->thread);
spin_unlock(&pl330->lock);
+ list_for_each_entry(desc, &pch->work_list, node) {
+ if (desc->status == BUSY)
+ desc->status = PAUSED;
+ }
spin_unlock_irqrestore(&pch->lock, flags);
pm_runtime_mark_last_busy(pl330->ddma.dev);
pm_runtime_put_autosuspend(pl330->ddma.dev);
@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
else if (running && desc == running)
transferred =
pl330_get_current_xferred_count(pch, desc);
- else if (desc->status == BUSY)
+ else if (desc->status == BUSY || desc->status == PAUSED)
/*
* Busy but not running means either just enqueued,
* or finished and not yet marked done
@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
case DONE:
ret = DMA_COMPLETE;
break;
+ case PAUSED:
+ ret = DMA_PAUSED;
+ break;
case PREP:
case BUSY:
ret = DMA_IN_PROGRESS;
--
2.30.2
Currently the KernelAllocator simply passes the size of the type Layout
to krealloc(), and in theory the alignment requirement from the type
Layout may be larger than the guarantee provided by SLAB, which means
the allocated object is mis-aligned.
Fixes this by adjusting the allocation size to the nearest power of two,
which SLAB always guarantees a size-aligned allocation. And because Rust
guarantees that original size must be a multiple of alignment and the
alignment must be a power of two, then the alignment requirement is
satisfied.
Suggested-by: Vlastimil Babka <vbabka(a)suse.cz>
Co-developed-by: Andreas Hindborg (Samsung) <nmi(a)metaspace.dk>
Signed-off-by: Andreas Hindborg (Samsung) <nmi(a)metaspace.dk>
Signed-off-by: Boqun Feng <boqun.feng(a)gmail.com>
Cc: stable(a)vger.kernel.org # v6.1+
---
Some more explanation:
* Layout is a data structure describing a particular memory layout,
conceptionally it has two fields: align and size.
* align is guaranteed to be a power of two.
* size can be smaller than align (only when the Layout is created via
Layout::from_align_size())
* After pad_to_align(), the size is guaranteed to be a multiple of
align
For more information, please see:
https://doc.rust-lang.org/stable/std/alloc/struct.Layout.html
rust/bindings/bindings_helper.h | 1 +
rust/kernel/allocator.rs | 17 ++++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 3e601ce2548d..6619ce95dd37 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -15,3 +15,4 @@
/* `bindgen` gets confused at certain things. */
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
+const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 397a3dd57a9b..66575cf87ce2 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -11,9 +11,24 @@
unsafe impl GlobalAlloc for KernelAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+ // Customized layouts from `Layout::from_size_align()` can have size < align, so pads first.
+ let layout = layout.pad_to_align();
+
+ let mut size = layout.size();
+
+ if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN {
+ // The alignment requirement exceeds the slab guarantee, then tries to enlarges the size
+ // to use the "power-of-two" size/alignment guarantee (see comments in kmalloc() for
+ // more information).
+ //
+ // Note that `layout.size()` (after padding) is guaranteed to be muliples of
+ // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
+ size = size.next_power_of_two();
+ }
+
// `krealloc()` is used instead of `kmalloc()` because the latter is
// an inline function and cannot be bound to as a result.
- unsafe { bindings::krealloc(ptr::null(), layout.size(), bindings::GFP_KERNEL) as *mut u8 }
+ unsafe { bindings::krealloc(ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 }
}
unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
--
2.39.2
When a grant entry is still in use by the remote domain, Linux must put
it on a deferred list. Normally, this list is very short, because
the PV network and block protocols expect the backend to unmap the grant
first. However, Qubes OS's GUI protocol is subject to the constraints
of the X Window System, and as such winds up with the frontend unmapping
the window first. As a result, the list can grow very large, resulting
in a massive memory leak and eventual VM freeze.
To partially solve this problem, make the number of entries that the VM
will attempt to free at each iteration tunable. The default is still
10, but it can be overridden at compile-time (via Kconfig), boot-time
(via a kernel command-line option), or runtime (via sysfs).
This is Cc: stable because (when combined with appropriate userspace
changes) it fixes a severe performance and stability problem for Qubes
OS users.
Cc: stable(a)vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
---
drivers/xen/grant-table.c | 40 ++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)
Changes since v2:
- use atomic_inc_return(x) and atomic_dec_return(x) instead of
atomic_add_return(1, x) and atomic_sub_return(1, x) respectively.
- move module_param macro closer to the definition of
free_per_iteration.
- add blank line between declarations and statements.
Changes since v1:
- drop setting default via Kconfig
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e1ec725c2819d4d5dede063eb00d86a6d52944c0..f13c3b76ad1eb7110e2a2981e9fa4e504174e431 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
+static atomic64_t deferred_count;
+static atomic64_t leaked_count;
+static unsigned int free_per_iteration = 10;
+module_param(free_per_iteration, uint, 0600);
+
static void gnttab_handle_deferred(struct timer_list *unused)
{
- unsigned int nr = 10;
+ unsigned int nr = READ_ONCE(free_per_iteration);
+ const bool ignore_limit = nr == 0;
struct deferred_entry *first = NULL;
unsigned long flags;
+ size_t freed = 0;
spin_lock_irqsave(&gnttab_list_lock, flags);
- while (nr--) {
+ while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
struct deferred_entry *entry
= list_first_entry(&deferred_list,
struct deferred_entry, list);
@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
list_del(&entry->list);
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref)) {
+ uint64_t ret = atomic64_dec_return(&deferred_count);
+
put_free_entry(entry->ref);
- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
- entry->ref, page_to_pfn(entry->page));
+ pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
+ entry->ref, page_to_pfn(entry->page),
+ (unsigned long long)ret);
put_page(entry->page);
+ freed++;
kfree(entry);
entry = NULL;
} else {
@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_lock_irqsave(&gnttab_list_lock, flags);
if (entry)
list_add_tail(&entry->list, &deferred_list);
- else if (list_empty(&deferred_list))
- break;
}
- if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
+ if (list_empty(&deferred_list))
+ WARN_ON(atomic64_read(&deferred_count));
+ else if (!timer_pending(&deferred_timer)) {
deferred_timer.expires = jiffies + HZ;
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ pr_debug("Freed %zu references", freed);
}
static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{
struct deferred_entry *entry;
gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
- const char *what = KERN_WARNING "leaking";
+ uint64_t leaked, deferred;
entry = kmalloc(sizeof(*entry), gfp);
if (!page) {
@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
- what = KERN_DEBUG "deferring";
+ deferred = atomic64_inc_return(&deferred_count);
+ leaked = atomic64_read(&leaked_count);
+ pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
+ } else {
+ deferred = atomic64_read(&deferred_count);
+ leaked = atomic64_inc_return(&leaked_count);
+ pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
}
- printk("%s g.e. %#x (pfn %#lx)\n",
- what, ref, page ? page_to_pfn(page) : -1);
}
int gnttab_try_end_foreign_access(grant_ref_t ref)
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab