Commit 7ded842b356d ("s390/bpf: Fix bpf_plt pointer arithmetic") has
accidentally removed the critical piece of commit c730fce7c70c
("s390/bpf: Fix bpf_arch_text_poke() with new_addr == NULL"), causing
intermittent kernel panics in e.g. perf's on_switch() prog to reappear.
Restore the fix and add a comment.
Fixes: 7ded842b356d ("s390/bpf: Fix bpf_plt pointer arithmetic")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilya Leoshkevich <iii(a)linux.ibm.com>
---
arch/s390/net/bpf_jit_comp.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 8bb738f1b1b6..bb17efe29d65 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -576,7 +576,15 @@ static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target)
{
memcpy(plt, &bpf_plt, sizeof(*plt));
plt->ret = ret;
- plt->target = target;
+ /*
+ * (target == NULL) implies that the branch to this PLT entry was
+ * patched and became a no-op. However, some CPU could have jumped
+ * to this PLT entry before patching and may be still executing it.
+ *
+ * Since the intention in this case is to make the PLT entry a no-op,
+ * make the target point to the return label instead of NULL.
+ */
+ plt->target = target ?: ret;
}
/*
--
2.50.1
When a call is released, rxrpc takes the spinlock and removes it from
->recvmsg_q in an effort to prevent racing recvmsg() invocations from
seeing the same call. Now, rxrpc_recvmsg() only takes the spinlock when
actually removing a call from the queue; it doesn't, however, take it in
the lead up to that when it checks to see if the queue is empty. It *does*
hold the socket lock, which prevents a recvmsg/recvmsg race - but this
doesn't prevent sendmsg from ending the call because sendmsg() drops the
socket lock and relies on the call->user_mutex.
Fix this by firstly removing the bit in rxrpc_release_call() that dequeues
the released call and, instead, rely on recvmsg() to simply discard
released calls (done in a preceding fix).
Secondly, rxrpc_notify_socket() is abandoned if the call is already marked
as released rather than trying to be clever by setting both pointers in
call->recvmsg_link to NULL to trick list_empty(). This isn't perfect and
can still race, resulting in a released call on the queue, but recvmsg()
will now clean that up.
Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
Signed-off-by: David Howells <dhowells(a)redhat.com>
Reviewed-by: Jeffrey Altman <jaltman(a)auristor.com>
cc: Marc Dionne <marc.dionne(a)auristor.com>
cc: Junvyyang, Tencent Zhuque Lab <zhuque(a)tencent.com>
cc: LePremierHomme <kwqcheii(a)proton.me>
cc: Jakub Kicinski <kuba(a)kernel.org>
cc: Paolo Abeni <pabeni(a)redhat.com>
cc: "David S. Miller" <davem(a)davemloft.net>
cc: Eric Dumazet <edumazet(a)google.com>
cc: Simon Horman <horms(a)kernel.org>
cc: linux-afs(a)lists.infradead.org
cc: netdev(a)vger.kernel.org
cc: stable(a)vger.kernel.org
---
include/trace/events/rxrpc.h | 2 +-
net/rxrpc/call_object.c | 28 ++++++++++++----------------
net/rxrpc/recvmsg.c | 4 ++++
3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index e7dcfb1369b6..8e5a73eb5268 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -325,7 +325,6 @@
EM(rxrpc_call_put_release_sock, "PUT rls-sock") \
EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \
EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \
- EM(rxrpc_call_put_unnotify, "PUT unnotify") \
EM(rxrpc_call_put_userid_exists, "PUT u-exists") \
EM(rxrpc_call_put_userid, "PUT user-id ") \
EM(rxrpc_call_see_accept, "SEE accept ") \
@@ -338,6 +337,7 @@
EM(rxrpc_call_see_disconnected, "SEE disconn ") \
EM(rxrpc_call_see_distribute_error, "SEE dist-err") \
EM(rxrpc_call_see_input, "SEE input ") \
+ EM(rxrpc_call_see_notify_released, "SEE nfy-rlsd") \
EM(rxrpc_call_see_recvmsg, "SEE recvmsg ") \
EM(rxrpc_call_see_release, "SEE release ") \
EM(rxrpc_call_see_userid_exists, "SEE u-exists") \
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 15067ff7b1f2..918f41d97a2f 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -561,7 +561,7 @@ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false, putu = false;
+ bool putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
@@ -573,23 +573,13 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
- /* Make sure we don't get any more notifications */
+ /* Note that at this point, the call may still be on or may have been
+ * added back on to the socket receive queue. recvmsg() must discard
+ * released calls. The CALL_RELEASED flag should prevent further
+ * notifications.
+ */
spin_lock_irq(&rx->recvmsg_lock);
-
- if (!list_empty(&call->recvmsg_link)) {
- _debug("unlinking once-pending call %p { e=%lx f=%lx }",
- call, call->events, call->flags);
- list_del(&call->recvmsg_link);
- put = true;
- }
-
- /* list_empty() must return false in rxrpc_notify_socket() */
- call->recvmsg_link.next = NULL;
- call->recvmsg_link.prev = NULL;
-
spin_unlock_irq(&rx->recvmsg_lock);
- if (put)
- rxrpc_put_call(call, rxrpc_call_put_unnotify);
write_lock(&rx->call_lock);
@@ -638,6 +628,12 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
+ while ((call = list_first_entry_or_null(&rx->recvmsg_q,
+ struct rxrpc_call, recvmsg_link))) {
+ list_del_init(&call->recvmsg_link);
+ rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q);
+ }
+
_leave("");
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 6990e37697de..7fa7e77f6bb9 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -29,6 +29,10 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
if (!list_empty(&call->recvmsg_link))
return;
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_see_call(call, rxrpc_call_see_notify_released);
+ return;
+ }
rcu_read_lock();
After a recent change in clang to expose uninitialized warnings from
const variables [1], there is a warning in cxacru_heavy_init():
drivers/usb/atm/cxacru.c:1104:6: error: variable 'bp' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized]
1104 | if (instance->modem_type->boot_rom_patch) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/usb/atm/cxacru.c:1113:39: note: uninitialized use occurs here
1113 | cxacru_upload_firmware(instance, fw, bp);
| ^~
drivers/usb/atm/cxacru.c:1104:2: note: remove the 'if' if its condition is always true
1104 | if (instance->modem_type->boot_rom_patch) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/usb/atm/cxacru.c:1095:32: note: initialize the variable 'bp' to silence this warning
1095 | const struct firmware *fw, *bp;
| ^
| = NULL
This warning occurs in clang's frontend before inlining occurs, so it
cannot notice that bp is only used within cxacru_upload_firmware() under
the same condition that initializes it in cxacru_heavy_init(). Just
initialize bp to NULL to silence the warning without functionally
changing the code, which is what happens with modern compilers when they
support '-ftrivial-auto-var-init=zero' (CONFIG_INIT_STACK_ALL_ZERO=y).
Cc: stable(a)vger.kernel.org
Fixes: 1b0e61465234 ("[PATCH] USB ATM: driver for the Conexant AccessRunner chipset cxacru")
Closes: https://github.com/ClangBuiltLinux/linux/issues/2102
Link: https://github.com/llvm/llvm-project/commit/2464313eef01c5b1edf0eccf57a32cd… [1]
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
drivers/usb/atm/cxacru.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index a12ab90b3db7..b7c3b224a759 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -1092,7 +1092,7 @@ static int cxacru_find_firmware(struct cxacru_data *instance,
static int cxacru_heavy_init(struct usbatm_data *usbatm_instance,
struct usb_interface *usb_intf)
{
- const struct firmware *fw, *bp;
+ const struct firmware *fw, *bp = NULL;
struct cxacru_data *instance = usbatm_instance->driver_data;
int ret = cxacru_find_firmware(instance, "fw", &fw);
---
base-commit: fdfa018c6962c86d2faa183187669569be4d513f
change-id: 20250715-usb-cxacru-fix-clang-21-uninit-warning-9430d96c6bc1
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
From: Lance Yang <lance.yang(a)linux.dev>
As pointed out by David[1], the batched unmap logic in try_to_unmap_one()
may read past the end of a PTE table when a large folio's PTE mappings
are not fully contained within a single page table.
While this scenario might be rare, an issue triggerable from userspace must
be fixed regardless of its likelihood. This patch fixes the out-of-bounds
access by refactoring the logic into a new helper, folio_unmap_pte_batch().
The new helper correctly calculates the safe batch size by capping the scan
at both the VMA and PMD boundaries. To simplify the code, it also supports
partial batching (i.e., any number of pages from 1 up to the calculated
safe maximum), as there is no strong reason to special-case for fully
mapped folios.
[1] https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redha…
Cc: <stable(a)vger.kernel.org>
Reported-by: David Hildenbrand <david(a)redhat.com>
Closes: https://lore.kernel.org/linux-mm/a694398c-9f03-4737-81b9-7e49c857fcbe@redha…
Fixes: 354dffd29575 ("mm: support batched unmap for lazyfree large folios during reclamation")
Suggested-by: Barry Song <baohua(a)kernel.org>
Acked-by: Barry Song <baohua(a)kernel.org>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Signed-off-by: Lance Yang <lance.yang(a)linux.dev>
---
v3 -> v4:
- Add Reported-by + Closes tags (per David)
- Pick RB from Lorenzo - thanks!
- Pick AB from David - thanks!
- https://lore.kernel.org/linux-mm/20250630011305.23754-1-lance.yang@linux.dev
v2 -> v3:
- Tweak changelog (per Barry and David)
- Pick AB from Barry - thanks!
- https://lore.kernel.org/linux-mm/20250627062319.84936-1-lance.yang@linux.dev
v1 -> v2:
- Update subject and changelog (per Barry)
- https://lore.kernel.org/linux-mm/20250627025214.30887-1-lance.yang@linux.dev
mm/rmap.c | 46 ++++++++++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c
index fb63d9256f09..1320b88fab74 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1845,23 +1845,32 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
#endif
}
-/* We support batch unmapping of PTEs for lazyfree large folios */
-static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
- struct folio *folio, pte_t *ptep)
+static inline unsigned int folio_unmap_pte_batch(struct folio *folio,
+ struct page_vma_mapped_walk *pvmw,
+ enum ttu_flags flags, pte_t pte)
{
const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
- int max_nr = folio_nr_pages(folio);
- pte_t pte = ptep_get(ptep);
+ unsigned long end_addr, addr = pvmw->address;
+ struct vm_area_struct *vma = pvmw->vma;
+ unsigned int max_nr;
+
+ if (flags & TTU_HWPOISON)
+ return 1;
+ if (!folio_test_large(folio))
+ return 1;
+ /* We may only batch within a single VMA and a single page table. */
+ end_addr = pmd_addr_end(addr, vma->vm_end);
+ max_nr = (end_addr - addr) >> PAGE_SHIFT;
+
+ /* We only support lazyfree batching for now ... */
if (!folio_test_anon(folio) || folio_test_swapbacked(folio))
- return false;
+ return 1;
if (pte_unused(pte))
- return false;
- if (pte_pfn(pte) != folio_pfn(folio))
- return false;
+ return 1;
- return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
- NULL, NULL) == max_nr;
+ return folio_pte_batch(folio, addr, pvmw->pte, pte, max_nr, fpb_flags,
+ NULL, NULL, NULL);
}
/*
@@ -2024,9 +2033,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (pte_dirty(pteval))
folio_mark_dirty(folio);
} else if (likely(pte_present(pteval))) {
- if (folio_test_large(folio) && !(flags & TTU_HWPOISON) &&
- can_batch_unmap_folio_ptes(address, folio, pvmw.pte))
- nr_pages = folio_nr_pages(folio);
+ nr_pages = folio_unmap_pte_batch(folio, &pvmw, flags, pteval);
end_addr = address + nr_pages * PAGE_SIZE;
flush_cache_range(vma, address, end_addr);
@@ -2206,13 +2213,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
hugetlb_remove_rmap(folio);
} else {
folio_remove_rmap_ptes(folio, subpage, nr_pages, vma);
- folio_ref_sub(folio, nr_pages - 1);
}
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
- folio_put(folio);
- /* We have already batched the entire folio */
- if (nr_pages > 1)
+ folio_put_refs(folio, nr_pages);
+
+ /*
+ * If we are sure that we batched the entire folio and cleared
+ * all PTEs, we can just optimize and stop right here.
+ */
+ if (nr_pages == folio_nr_pages(folio))
goto walk_done;
continue;
walk_abort:
--
2.49.0