The .get_modes() callback is supposed to return the number of modes,
never a negative error code. If a negative value is returned, it'll just
be interpreted as a negative count, and added to previous calculations.
Document the rules, but handle the negative values gracefully with an
error message.
Cc: stable(a)vger.kernel.org
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/drm_probe_helper.c | 7 +++++++
include/drm/drm_modeset_helper_vtables.h | 3 ++-
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 4d60cc810b57..bf2dd1f46b6c 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -422,6 +422,13 @@ static int drm_helper_probe_get_modes(struct drm_connector *connector)
count = connector_funcs->get_modes(connector);
+ /* The .get_modes() callback should not return negative values. */
+ if (count < 0) {
+ drm_err(connector->dev, ".get_modes() returned %pe\n",
+ ERR_PTR(count));
+ count = 0;
+ }
+
/*
* Fallback for when DDC probe failed in drm_get_edid() and thus skipped
* override/firmware EDID.
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 881b03e4dc28..9ed42469540e 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -898,7 +898,8 @@ struct drm_connector_helper_funcs {
*
* RETURNS:
*
- * The number of modes added by calling drm_mode_probed_add().
+ * The number of modes added by calling drm_mode_probed_add(). Return 0
+ * on failures (no modes) instead of negative error codes.
*/
int (*get_modes)(struct drm_connector *connector);
--
2.39.2
From: Ard Biesheuvel <ardb(a)kernel.org>
Our efi_tcg2_tagged_event is not defined in the EFI spec, but it is not
a local invention either: it was taken from the TCG PC Client spec,
where it is called TCG_PCClientTaggedEvent.
This spec also contains some guidance on how to populate it, which
is not being followed closely at the moment; the event size should cover
the TCG_PCClientTaggedEvent and its payload only, but it currently
covers the preceding efi_tcg2_event too, and this may result in trailing
garbage being measured into the TPM.
So rename the struct and document its provenance, and fix up the use so
only the tagged event data is represented in the size field.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
drivers/firmware/efi/libstub/efi-stub-helper.c | 20 +++++++++++---------
drivers/firmware/efi/libstub/efistub.h | 12 ++++++------
2 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index bfa30625f5d0..16843ab9b64d 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -11,6 +11,7 @@
#include <linux/efi.h>
#include <linux/kernel.h>
+#include <linux/overflow.h>
#include <asm/efi.h>
#include <asm/setup.h>
@@ -219,23 +220,24 @@ static const struct {
},
};
+struct efistub_measured_event {
+ efi_tcg2_event_t event_data;
+ TCG_PCClientTaggedEvent tagged_event;
+} __packed;
+
static efi_status_t efi_measure_tagged_event(unsigned long load_addr,
unsigned long load_size,
enum efistub_event event)
{
+ struct efistub_measured_event *evt;
+ int size = struct_size(&evt->tagged_event, tagged_event_data,
+ events[event].event_data_len);
efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
efi_tcg2_protocol_t *tcg2 = NULL;
efi_status_t status;
efi_bs_call(locate_protocol, &tcg2_guid, NULL, (void **)&tcg2);
if (tcg2) {
- struct efi_measured_event {
- efi_tcg2_event_t event_data;
- efi_tcg2_tagged_event_t tagged_event;
- u8 tagged_event_data[];
- } *evt;
- int size = sizeof(*evt) + events[event].event_data_len;
-
status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, size,
(void **)&evt);
if (status != EFI_SUCCESS)
@@ -249,12 +251,12 @@ static efi_status_t efi_measure_tagged_event(unsigned long load_addr,
.event_header.event_type = EV_EVENT_TAG,
};
- evt->tagged_event = (struct efi_tcg2_tagged_event){
+ evt->tagged_event = (TCG_PCClientTaggedEvent){
.tagged_event_id = events[event].event_id,
.tagged_event_data_size = events[event].event_data_len,
};
- memcpy(evt->tagged_event_data, events[event].event_data,
+ memcpy(evt->tagged_event.tagged_event_data, events[event].event_data,
events[event].event_data_len);
status = efi_call_proto(tcg2, hash_log_extend_event, 0,
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index c04b82ea40f2..043a3ff435f3 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -843,14 +843,14 @@ struct efi_tcg2_event {
/* u8[] event follows here */
} __packed;
-struct efi_tcg2_tagged_event {
- u32 tagged_event_id;
- u32 tagged_event_data_size;
- /* u8 tagged event data follows here */
-} __packed;
+/* from TCG PC Client Platform Firmware Profile Specification */
+typedef struct tdTCG_PCClientTaggedEvent {
+ u32 tagged_event_id;
+ u32 tagged_event_data_size;
+ u8 tagged_event_data[];
+} TCG_PCClientTaggedEvent;
typedef struct efi_tcg2_event efi_tcg2_event_t;
-typedef struct efi_tcg2_tagged_event efi_tcg2_tagged_event_t;
typedef union efi_tcg2_protocol efi_tcg2_protocol_t;
union efi_tcg2_protocol {
--
2.44.0.278.ge034bb2e1d-goog
From: Jeff Vanhoof <qjv001(a)motorola.com>
arm-smmu related crashes seen after a Missed ISOC interrupt when
no_interrupt=1 is used. This can happen if the hardware is still using
the data associated with a TRB after the usb_request's ->complete call
has been made. Instead of immediately releasing a request when a Missed
ISOC interrupt has occurred, this change will add logic to cancel the
request instead where it will eventually be released when the
END_TRANSFER command has completed. This logic is similar to some of the
cleanup done in dwc3_gadget_ep_dequeue.
Fixes: 6d8a019614f3 ("usb: dwc3: gadget: check for Missed Isoc from event status")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jeff Vanhoof <qjv001(a)motorola.com>
Co-developed-by: Dan Vacura <w36195(a)motorola.com>
Signed-off-by: Dan Vacura <w36195(a)motorola.com>
---
V1 -> V3:
- no change, new patch in series
drivers/usb/dwc3/core.h | 1 +
drivers/usb/dwc3/gadget.c | 38 ++++++++++++++++++++++++++------------
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 8f9959ba9fd4..9b005d912241 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -943,6 +943,7 @@ struct dwc3_request {
#define DWC3_REQUEST_STATUS_DEQUEUED 3
#define DWC3_REQUEST_STATUS_STALLED 4
#define DWC3_REQUEST_STATUS_COMPLETED 5
+#define DWC3_REQUEST_STATUS_MISSED_ISOC 6
#define DWC3_REQUEST_STATUS_UNKNOWN -1
u8 epnum;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 079cd333632e..411532c5c378 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2021,6 +2021,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep)
case DWC3_REQUEST_STATUS_STALLED:
dwc3_gadget_giveback(dep, req, -EPIPE);
break;
+ case DWC3_REQUEST_STATUS_MISSED_ISOC:
+ dwc3_gadget_giveback(dep, req, -EXDEV);
+ break;
default:
dev_err(dwc->dev, "request cancelled with wrong reason:%d\n", req->status);
dwc3_gadget_giveback(dep, req, -ECONNRESET);
@@ -3402,21 +3405,32 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep,
struct dwc3 *dwc = dep->dwc;
bool no_started_trb = true;
- dwc3_gadget_ep_cleanup_completed_requests(dep, event, status);
+ if (status == -EXDEV) {
+ struct dwc3_request *tmp;
+ struct dwc3_request *req;
- if (dep->flags & DWC3_EP_END_TRANSFER_PENDING)
- goto out;
+ if (!(dep->flags & DWC3_EP_END_TRANSFER_PENDING))
+ dwc3_stop_active_transfer(dep, true, true);
- if (!dep->endpoint.desc)
- return no_started_trb;
+ list_for_each_entry_safe(req, tmp, &dep->started_list, list)
+ dwc3_gadget_move_cancelled_request(req,
+ DWC3_REQUEST_STATUS_MISSED_ISOC);
+ } else {
+ dwc3_gadget_ep_cleanup_completed_requests(dep, event, status);
- if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
- list_empty(&dep->started_list) &&
- (list_empty(&dep->pending_list) || status == -EXDEV))
- dwc3_stop_active_transfer(dep, true, true);
- else if (dwc3_gadget_ep_should_continue(dep))
- if (__dwc3_gadget_kick_transfer(dep) == 0)
- no_started_trb = false;
+ if (dep->flags & DWC3_EP_END_TRANSFER_PENDING)
+ goto out;
+
+ if (!dep->endpoint.desc)
+ return no_started_trb;
+
+ if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
+ list_empty(&dep->started_list) && list_empty(&dep->pending_list))
+ dwc3_stop_active_transfer(dep, true, true);
+ else if (dwc3_gadget_ep_should_continue(dep))
+ if (__dwc3_gadget_kick_transfer(dep) == 0)
+ no_started_trb = false;
+ }
out:
/*
--
2.34.1
Since kernel version 5.4.217 LTS, there has been an issue with the kernel live patching feature becoming unavailable.
When compiling the sample code for kernel live patching, the following message is displayed when enabled:
livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
Reproduction steps:
1.git checkout v5.4.269 -b v5.4.269
2.make defconfig
3. Set CONFIG_LIVEPATCH=y、CONFIG_SAMPLE_LIVEPATCH=m
4. make -j bzImage
5. make samples/livepatch/livepatch-sample.ko
6. qemu-system-x86_64 -kernel arch/x86_64/boot/bzImage -nographic -append "console=ttyS0" -initrd initrd.img -m 1024M
7. insmod livepatch-sample.ko
Kernel live patch cannot complete successfully.
After some debugging, the immediate cause of the patch failure is an error in stack checking. The logs are as follows:
[ 340.974853] livepatch: klp_check_stack: kworker/u256:0:23486 has an unreliable stack
[ 340.974858] livepatch: klp_check_stack: kworker/u256:1:23487 has an unreliable stack
[ 340.974863] livepatch: klp_check_stack: kworker/u256:2:23488 has an unreliable stack
[ 340.974868] livepatch: klp_check_stack: kworker/u256:5:23489 has an unreliable stack
[ 340.974872] livepatch: klp_check_stack: kworker/u256:6:23490 has an unreliable stack
......
BTW,if you use the v5.4.217 tag for testing, make sure to set CONFIG_RETPOLINE = y and CONFIG_LIVEPATCH = y, and other steps are consistent with v5.4.269
After investigation, The problem is strongly related to the commit 8afd1c7da2b0 ("x86/speculation: Change FILL_RETURN_BUFFER to work with objtool"),
which would cause incorrect ORC entries to be generated, and the v5.4.217 version can undo this commit to make kernel livepatch work normally.
It is a back-ported upstream patch with some code adjustments,from the git log, the author also mentioned no intra-function call validation support.
Based on commit 6e1f54a4985b63bc1b55a09e5e75a974c5d6719b (Linux 5.4.269), This patchset adds stack validation support for intra-function calls,
allowing the kernel live patching feature to work correctly.
v3 - v2
- fix the compile error in arch/x86/kvm/svm.c, the error message is../arch/x86/include/asm/nospec-branch.h: 313: Error: no such instruction: 'unwind_hint_empty'
v2 - v1
- add the tag "Cc: stable(a)vger.kernel.org" in the sign-off area for patch x86/speculation: Support intra-function call
- add my own Signed-off to all patches
Alexandre Chartre (2):
objtool: is_fentry_call() crashes if call has no destination
objtool: Add support for intra-function calls
Rui Qi (1):
x86/speculation: Support intra-function call validation
arch/x86/include/asm/nospec-branch.h | 7 ++
arch/x86/include/asm/unwind_hints.h | 2 +-
include/linux/frame.h | 11 ++++
.../Documentation/stack-validation.txt | 8 +++
tools/objtool/arch/x86/decode.c | 6 ++
tools/objtool/check.c | 64 +++++++++++++++++--
6 files changed, 92 insertions(+), 6 deletions(-)
--
2.20.1
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from
a test case. But there has been agreement based on code review that this
is possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was valid. This wasn't present in get_swap_device() so
I've added it. I couldn't find any existing get_swap_device() call sites
where this extra check would cause any false alarms.
Details of how to provoke one possible issue (thanks to David Hilenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
Applies on top of v6.8-rc6 and mm-unstable (b38c34939fe4).
Thanks,
Ryan
mm/swapfile.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b3a2d85e350..f580e6abc674 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1281,7 +1281,9 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
smp_rmb();
offset = swp_offset(entry);
if (offset >= si->max)
- goto put_out;
+ goto bad_offset;
+ if (data_race(!si->swap_map[swp_offset(entry)]))
+ goto bad_free;
return si;
bad_nofile:
@@ -1289,9 +1291,14 @@ struct swap_info_struct *get_swap_device(swp_entry_t entry)
out:
return NULL;
put_out:
- pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
percpu_ref_put(&si->users);
return NULL;
+bad_offset:
+ pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
+ goto put_out;
+bad_free:
+ pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
+ goto put_out;
}
static unsigned char __swap_entry_free(struct swap_info_struct *p,
@@ -1609,13 +1616,14 @@ int free_swap_and_cache(swp_entry_t entry)
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
--
2.25.1
Hi,
I have to admit that v3 was a lazy attempt. This one should be on
the right path.
this series does basically two things:
1. Disables automatic load balancing as adviced by the hardware
workaround.
2. Assigns all the CCS slices to one single user engine. The user
will then be able to query only one CCS engine
I'm using here the "Requires: " tag, but I'm not sure the commit
id will be valid, on the other hand, I don't know what commit id
I should use.
Thanks Tvrtko, Matt, John and Joonas for your reviews!
Andi
Changelog
=========
v3 -> v4
- Reword correctly the comment in the workaround
- Fix a buffer overflow (Thanks Joonas)
- Handle properly the fused engines when setting the CCS mode.
v2 -> v3
- Simplified the algorithm for creating the list of the exported
uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
(Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
outcome.
v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed UABI
engine list and adapt the engine counting accordingly (thanks
Tvrtko).
- Reword the commit of Patch 2 (thanks John).
Andi Shyti (3):
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Refactor uabi engine class/instance list creation
drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/gt/intel_engine_user.c | 40 ++++++++++++++-------
drivers/gpu/drm/i915/gt/intel_gt.c | 23 ++++++++++++
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 6 ++++
drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +++
4 files changed, 62 insertions(+), 12 deletions(-)
--
2.43.0
Hi,
On Bugzilla, danilrybakov249(a)gmail.com reported stable-specific, ACPI error
regression that led into high CPU temperature [1]. He wrote:
> Overview:
>
> After updating from lts v6.6.14-2 to lts v6.6.17-1 noticed high CPU temperature and lag. After running htop noticed that journald was using 30-60% of CPU. Afterwards, tried switching to stable, or lts v6.6.18-1, but encountered the same issue.
>
> Running journalctl -f gives these lines over and over again:
>
> Feb 19 21:09:12 danirybe kernel: ACPI Error: Could not disable RealTimeClock events (20230628/evxfevnt-243)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No handler or method for GPE 08, disabling event (20230628/evgpe-839)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No handler or method for GPE 0A, disabling event (20230628/evgpe-839)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No handler or method for GPE 0B, disabling event (20230628/evgpe-839)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No installed handler for fixed event - PM_Timer (0), disabling (20230628/evevent-255)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No installed handler for fixed event - PowerButton (2), disabling (20230628/evevent-255)
> Feb 19 21:09:12 danirybe kernel: ACPI Error: No installed handler for fixed event - SleepButton (3), disabling (20230628/evevent-255)
>
> My system info:
>
> Laptop model: ASUS VivoBook D540NV-GQ065T
> OS: Arch Linux x86_64
> Kernel: 6.6.14-2-lts
> WM: sway
> CPU: Intel Pentium N420 (4) @ 2.500GHz
> GPU1: Intel Apollo Lake [HD Graphics 505]
> GPU2: NVIDIA GeForce 920MX
>
> I've pinned down the commit after which the problem occurs:
>
> 847e1eb30e269a094da046c08273abe3f3361cf2 is the first bad commit
> commit 847e1eb30e269a094da046c08273abe3f3361cf2
> Author: Shin'ichiro Kawasaki <shinichiro.kawasaki(a)wdc.com>
> Date: Mon Jan 8 15:20:58 2024 +0900
>
> platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe
>
> commit 5913320eb0b3ec88158cfcb0fa5e996bf4ef681b upstream.
>
> <snipped>...
See Bugzilla for the full thread.
Thanks.
[1]: https://bugzilla.kernel.org/show_bug.cgi?id=218531
--
An old man doll... just what I always wanted! - Clara
From: Vitor Soares <vitor.soares(a)toradex.com>
When the mcp251xfd_start_xmit() function fails, the driver stops
processing messages, and the interrupt routine does not return,
running indefinitely even after killing the running application.
Error messages:
[ 441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16
[ 441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3).
... and repeat forever.
The issue can be triggered when multiple devices share the same
SPI interface. And there is concurrent access to the bus.
The problem occurs because tx_ring->head increments even if
mcp251xfd_start_xmit() fails. Consequently, the driver skips one
TX package while still expecting a response in
mcp251xfd_handle_tefif_one().
This patch resolves the issue by decreasing tx_ring->head if
mcp251xfd_start_xmit() fails. With the fix, if we attempt to trigger
the issue again, the driver prints an error and discard the message.
Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Cc: stable(a)vger.kernel.org
Signed-off-by: Vitor Soares <vitor.soares(a)toradex.com>
---
drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c | 27 ++++++++++----------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
index 160528d3cc26..a8eb941c1b95 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
@@ -181,25 +181,26 @@ netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
mcp251xfd_tx_obj_from_skb(priv, tx_obj, skb, tx_ring->head);
- /* Stop queue if we occupy the complete TX FIFO */
tx_head = mcp251xfd_get_tx_head(tx_ring);
- tx_ring->head++;
- if (mcp251xfd_get_tx_free(tx_ring) == 0)
- netif_stop_queue(ndev);
-
frame_len = can_skb_get_frame_len(skb);
- err = can_put_echo_skb(skb, ndev, tx_head, frame_len);
- if (!err)
- netdev_sent_queue(priv->ndev, frame_len);
+ can_put_echo_skb(skb, ndev, tx_head, frame_len);
+
+ tx_ring->head++;
err = mcp251xfd_tx_obj_write(priv, tx_obj);
- if (err)
- goto out_err;
+ if (err) {
+ can_free_echo_skb(ndev, tx_head, NULL);
- return NETDEV_TX_OK;
+ tx_ring->head--;
+
+ netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+ } else {
+ /* Stop queue if we occupy the complete TX FIFO */
+ if (mcp251xfd_get_tx_free(tx_ring) == 0)
+ netif_stop_queue(ndev);
- out_err:
- netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
+ netdev_sent_queue(priv->ndev, frame_len);
+ }
return NETDEV_TX_OK;
}
--
2.34.1
From: Muhammad Ahmed <ahmed.ahmed(a)amd.com>
[WHY]
Blackscreen hang @ PC EF000025 when trying to wake up from S0i3. DCN
gets powered off due to dc_power_down_on_boot() being called after
timeout.
[HOW]
Setting the power_down_on_boot function pointer to null since we don't
expect the function to be called for APU.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Muhammad Ahmed <ahmed.ahmed(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
index dce620d359a6..d4e0abbef28e 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c
@@ -39,7 +39,7 @@
static const struct hw_sequencer_funcs dcn35_funcs = {
.program_gamut_remap = dcn30_program_gamut_remap,
.init_hw = dcn35_init_hw,
- .power_down_on_boot = dcn35_power_down_on_boot,
+ .power_down_on_boot = NULL,
.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
.apply_ctx_for_surface = NULL,
.program_front_end_for_ctx = dcn20_program_front_end_for_ctx,
--
2.34.1
We need to avoid the first page if we don't read it entirely.
We need to avoid the last page if we don't read it entirely.
While rather simple, this logic has been failed in the previous
fix. This time I wrote about 30 unit tests locally to check each
possible condition, hopefully I covered them all.
Reported-by: Christophe Kerello <christophe.kerello(a)foss.st.com>
Closes: https://lore.kernel.org/linux-mtd/20240221175327.42f7076d@xps-13/T/#m399bac…
Suggested-by: Christophe Kerello <christophe.kerello(a)foss.st.com>
Fixes: 828f6df1bcba ("mtd: rawnand: Clarify conditions to enable continuous reads")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/nand_base.c | 38 ++++++++++++++++++--------------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 3b3ce2926f5d..bcfd99a1699f 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3466,30 +3466,36 @@ static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
u32 readlen, int col)
{
struct mtd_info *mtd = nand_to_mtd(chip);
- unsigned int end_page, end_col;
+ unsigned int first_page, last_page;
chip->cont_read.ongoing = false;
if (!chip->controller->supported_op.cont_read)
return;
- end_page = DIV_ROUND_UP(col + readlen, mtd->writesize);
- end_col = (col + readlen) % mtd->writesize;
+ /*
+ * Don't bother making any calculations if the length is too small.
+ * Side effect: avoids possible integer underflows below.
+ */
+ if (readlen < (2 * mtd->writesize))
+ return;
+ /* Derive the page where continuous read should start (the first full page read) */
+ first_page = page;
if (col)
- page++;
-
- if (end_col && end_page)
- end_page--;
-
- if (page + 1 > end_page)
- return;
-
- chip->cont_read.first_page = page;
- chip->cont_read.last_page = end_page;
- chip->cont_read.ongoing = true;
-
- rawnand_cap_cont_reads(chip);
+ first_page++;
+
+ /* Derive the page where continuous read should stop (the last full page read) */
+ last_page = page + ((col + readlen) / mtd->writesize) - 1;
+
+ /* Configure and enable continuous read when suitable */
+ if (first_page < last_page) {
+ chip->cont_read.first_page = first_page;
+ chip->cont_read.last_page = last_page;
+ chip->cont_read.ongoing = true;
+ /* May reset the ongoing flag */
+ rawnand_cap_cont_reads(chip);
+ }
}
static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned int page)
--
2.34.1
While crossing a LUN boundary, it is probably safer (and clearer) to
keep all members of the continuous read structure aligned, including the
pause page (which is the last page of the lun or the last page of the
continuous read). Once these members properly in sync, we can use the
rawnand_cap_cont_reads() helper everywhere to "prepare" the next
continuous read if there is one.
Fixes: bbcd80f53a5e ("mtd: rawnand: Prevent crossing LUN boundaries during sequential reads")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
This is not 100% a fix but I believe it is worth backporting as there
may be corner cases which were not identified with the initial
implementation.
---
drivers/mtd/nand/raw/nand_base.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d6a27e08b112..4d5a663e4e05 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1232,6 +1232,15 @@ static void rawnand_cap_cont_reads(struct nand_chip *chip)
chip->cont_read.pause_page = rawnand_last_page_of_lun(ppl, first_lun);
else
chip->cont_read.pause_page = chip->cont_read.last_page;
+
+ if (chip->cont_read.first_page == chip->cont_read.pause_page) {
+ chip->cont_read.first_page++;
+ chip->cont_read.pause_page = min(chip->cont_read.last_page,
+ rawnand_last_page_of_lun(ppl, first_lun + 1));
+ }
+
+ if (chip->cont_read.first_page >= chip->cont_read.last_page)
+ chip->cont_read.ongoing = false;
}
static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page,
@@ -1298,12 +1307,11 @@ static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int p
if (!chip->cont_read.ongoing)
return 0;
- if (page == chip->cont_read.pause_page &&
- page != chip->cont_read.last_page) {
- chip->cont_read.first_page = chip->cont_read.pause_page + 1;
- rawnand_cap_cont_reads(chip);
- } else if (page == chip->cont_read.last_page) {
+ if (page == chip->cont_read.last_page) {
chip->cont_read.ongoing = false;
+ } else if (page == chip->cont_read.pause_page) {
+ chip->cont_read.first_page++;
+ rawnand_cap_cont_reads(chip);
}
return 0;
@@ -3510,10 +3518,7 @@ static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned i
return;
chip->cont_read.first_page++;
- if (chip->cont_read.first_page == chip->cont_read.pause_page)
- chip->cont_read.first_page++;
- if (chip->cont_read.first_page >= chip->cont_read.last_page)
- chip->cont_read.ongoing = false;
+ rawnand_cap_cont_reads(chip);
}
/**
--
2.34.1
To add another datapoint to this - I've seen the same problem on Dell
PowerEdge R6615 servers... but no others.
The problem also crept into the 6.1.79 kernel with the commit
mentioned earlier, and is fixed by reverting that commit. Adding
nogbpages to the kernel command line can cause the failure to
reproduce on that hardware as well.
From: Jakub Kicinski <kuba(a)kernel.org>
[ Upstream commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb ]
Similarly to previous commit, the submitting thread (recvmsg/sendmsg)
may exit as soon as the async crypto handler calls complete().
Reorder scheduling the work before calling complete().
This seems more logical in the first place, as it's
the inverse order of what the submitting thread will do.
Reported-by: valis <sec(a)valis.email>
Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Sabrina Dubroca <sd(a)queasysnail.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
(cherry picked from commit 6db22d6c7a6dc914b12c0469b94eb639b6a8a146)
[Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older]
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
net/tls/tls_sw.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 910da98d6bfb3..58d9b5c06cf89 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -440,7 +440,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sge;
struct sk_msg *msg_en;
struct tls_rec *rec;
- bool ready = false;
int pending;
rec = container_of(aead_req, struct tls_rec, aead_req);
@@ -472,8 +471,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
spin_lock_bh(&ctx->encrypt_compl_lock);
@@ -482,13 +485,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (!ready)
- return;
-
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
}
static int tls_do_encryption(struct sock *sk,
--
2.44.0.278.ge034bb2e1d-goog
From: Jakub Kicinski <kuba(a)kernel.org>
[ Upstream commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb ]
Similarly to previous commit, the submitting thread (recvmsg/sendmsg)
may exit as soon as the async crypto handler calls complete().
Reorder scheduling the work before calling complete().
This seems more logical in the first place, as it's
the inverse order of what the submitting thread will do.
Reported-by: valis <sec(a)valis.email>
Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Sabrina Dubroca <sd(a)queasysnail.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
(cherry picked from commit 6db22d6c7a6dc914b12c0469b94eb639b6a8a146)
[Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older]
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
net/tls/tls_sw.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 46f1c19f7c60b..25a408206b3e0 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -443,7 +443,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sge;
struct sk_msg *msg_en;
struct tls_rec *rec;
- bool ready = false;
int pending;
rec = container_of(aead_req, struct tls_rec, aead_req);
@@ -475,8 +474,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
spin_lock_bh(&ctx->encrypt_compl_lock);
@@ -485,13 +488,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (!ready)
- return;
-
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
}
static int tls_do_encryption(struct sock *sk,
--
2.44.0.278.ge034bb2e1d-goog
From: Jakub Kicinski <kuba(a)kernel.org>
[ Upstream commit e01e3934a1b2d122919f73bc6ddbe1cdafc4bbdb ]
Similarly to previous commit, the submitting thread (recvmsg/sendmsg)
may exit as soon as the async crypto handler calls complete().
Reorder scheduling the work before calling complete().
This seems more logical in the first place, as it's
the inverse order of what the submitting thread will do.
Reported-by: valis <sec(a)valis.email>
Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Reviewed-by: Sabrina Dubroca <sd(a)queasysnail.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
(cherry picked from commit 6db22d6c7a6dc914b12c0469b94eb639b6a8a146)
[Lee: Fixed merge-conflict in Stable branches linux-6.1.y and older]
Signed-off-by: Lee Jones <lee(a)kernel.org>
---
net/tls/tls_sw.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fc55b65695e5c..e51dc9d02b4e7 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -448,7 +448,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sge;
struct sk_msg *msg_en;
struct tls_rec *rec;
- bool ready = false;
int pending;
rec = container_of(aead_req, struct tls_rec, aead_req);
@@ -480,8 +479,12 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
spin_lock_bh(&ctx->encrypt_compl_lock);
@@ -490,13 +493,6 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
if (!pending && ctx->async_notify)
complete(&ctx->async_wait.completion);
spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (!ready)
- return;
-
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
}
static int tls_do_encryption(struct sock *sk,
--
2.44.0.278.ge034bb2e1d-goog
This commit resolves an issue in the tegra-xudc USB gadget driver that
incorrectly fetched USB3 PHY instances. The problem stemmed from the
assumption of a one-to-one correspondence between USB2 and USB3 PHY
names and their association with physical USB ports in the device tree.
Previously, the driver associated USB3 PHY names directly with the USB3
instance number, leading to mismatches when mapping the physical USB
ports. For instance, if using USB3-1 PHY, the driver expect the
corresponding PHY name as 'usb3-1'. However, the physical USB ports in
the device tree were designated as USB2-0 and USB3-0 as we only have
one device controller, causing a misalignment.
This commit rectifies the issue by adjusting the PHY naming logic.
Now, the driver correctly correlates the USB2 and USB3 PHY instances,
allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair
while accurately reflecting their configuration in the device tree by
naming them USB2-0 and USB3-0, respectively.
The change ensures that the PHY and PHY names align appropriately,
resolving the mismatch between physical USB ports and their associated
names in the device tree.
Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
V1 -> V2:no change
drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++-----------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index cb85168fd00c..7aa46d426f31 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3491,8 +3491,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc)
static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
{
- int err = 0, usb3;
- unsigned int i;
+ int err = 0, usb3_companion_port;
+ unsigned int i, j;
xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys,
sizeof(*xudc->utmi_phy), GFP_KERNEL);
@@ -3520,7 +3520,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
if (IS_ERR(xudc->utmi_phy[i])) {
err = PTR_ERR(xudc->utmi_phy[i]);
dev_err_probe(xudc->dev, err,
- "failed to get usb2-%d PHY\n", i);
+ "failed to get PHY for phy-name usb2-%d\n", i);
goto clean_up;
} else if (xudc->utmi_phy[i]) {
/* Get usb-phy, if utmi phy is available */
@@ -3539,19 +3539,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
}
/* Get USB3 phy */
- usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
- if (usb3 < 0)
+ usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
+ if (usb3_companion_port < 0)
continue;
- snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3);
- xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
- if (IS_ERR(xudc->usb3_phy[i])) {
- err = PTR_ERR(xudc->usb3_phy[i]);
- dev_err_probe(xudc->dev, err,
- "failed to get usb3-%d PHY\n", usb3);
- goto clean_up;
- } else if (xudc->usb3_phy[i])
- dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3);
+ for (j = 0; j < xudc->soc->num_phys; j++) {
+ snprintf(phy_name, sizeof(phy_name), "usb3-%d", j);
+ xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
+ if (IS_ERR(xudc->usb3_phy[i])) {
+ err = PTR_ERR(xudc->usb3_phy[i]);
+ dev_err_probe(xudc->dev, err,
+ "failed to get PHY for phy-name usb3-%d\n", j);
+ goto clean_up;
+ } else if (xudc->usb3_phy[i]) {
+ int usb2_port =
+ tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]);
+ int usb3_port =
+ tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]);
+ if (usb3_port == usb3_companion_port) {
+ dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n",
+ usb2_port, usb3_port, i);
+ break;
+ }
+ }
+ }
}
return err;
--
2.25.1
when AOP_WRITEPAGE_ACTIVATE is returned (as NFS does when it detects
congestion) it is important that the folio is redirtied.
nfs_writepage_locked() doesn't do this, so files can become corrupted as
writes can be lost.
Note that this is not needed in v6.8 as AOP_WRITEPAGE_ACTIVATE cannot be
returned. It is needed for kernels v5.18..v6.7. Prior to 6.3 the patch
is different as it needs to mention "page", not "folio".
Reported-and-tested-by: Jacek Tomaka <Jacek.Tomaka(a)poczta.fm>
Fixes: 6df25e58532b ("nfs: remove reliance on bdi congestion")
Signed-off-by: NeilBrown <neilb(a)suse.de>
---
fs/nfs/write.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b664caea8b4e..9e345d3c305a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -668,8 +668,10 @@ static int nfs_writepage_locked(struct folio *folio,
int err;
if (wbc->sync_mode == WB_SYNC_NONE &&
- NFS_SERVER(inode)->write_congested)
+ NFS_SERVER(inode)->write_congested) {
+ folio_redirty_for_writepage(wbc, folio);
return AOP_WRITEPAGE_ACTIVATE;
+ }
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0, false,
--
2.43.0
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
Tested-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..48b28c59c592 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_changeset_destroy() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
Reviewed-by: Saravana Kannan <saravanak(a)google.com>
Tested-by: Luca Ceresoli <luca.ceresoli(a)bootlin.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
---
drivers/of/dynamic.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f..4d57a4e34105 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,17 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * When a device is deleted, the device links to/from it are also queued
+ * for deletion. Until these device links are freed, the devices
+ * themselves aren't freed. If the device being deleted is due to an
+ * overlay change, this device might be holding a reference to a device
+ * node that will be freed. So, wait until all already pending device
+ * links are deleted before freeing a device node. This ensures we don't
+ * free any device node that has a non-zero reference count.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
--
2.43.0
Hello,
54d217406afe250d7a768783baaa79a035f21d38 fixed an issue in
drm_dp_add_payload_part2 that lead to a NULL pointer dereference in
case state is NULL.
The change was (accidentally?) reverted in
5aa1dfcdf0a429e4941e2eef75b006a8c7a8ac49 and the problem reappeared.
The issue is rather spurious, but I've had it appear when unplugging a
thunderbolt dock.
#regzbot introduced 5aa1dfcdf0a429e4941e2eef75b006a8c7a8ac49
There are reports that since version 6.7 update-grub fails to find the
device of the root on systems without initrd and on a single device.
This looks like the device name changed in the output of
/proc/self/mountinfo:
6.5-rc5 working
18 1 0:16 / / rw,noatime - btrfs /dev/sda8 ...
6.7 not working:
17 1 0:15 / / rw,noatime - btrfs /dev/root ...
and "update-grub" shows this error:
/usr/sbin/grub-probe: error: cannot find a device for / (is /dev mounted?)
This looks like it's related to the device name, but grub-probe
recognizes the "/dev/root" path and tries to find the underlying device.
However there's a special case for some filesystems, for btrfs in
particular.
The generic root device detection heuristic is not done and it all
relies on reading the device infos by a btrfs specific ioctl. This ioctl
returns the device name as it was saved at the time of device scan (in
this case it's /dev/root).
The change in 6.7 for temp_fsid to allow several single device
filesystem to exist with the same fsid (and transparently generate a new
UUID at mount time) was to skip caching/registering such devices.
This also skipped mounted device. One step of scanning is to check if
the device name hasn't changed, and if yes then update the cached value.
This broke the grub-probe as it always read the device /dev/root and
couldn't find it in the system. A temporary workaround is to create a
symlink but this does not survive reboot.
The right fix is to allow updating the device path of a mounted
filesystem even if this is a single device one.
In the fix, check if the device's major:minor number matches with the
cached device. If they do, then we can allow the scan to happen so that
device_list_add() can take care of updating the device path. The file
descriptor remains unchanged.
This does not affect the temp_fsid feature, the UUID of the mounted
filesystem remains the same and the matching is based on device major:minor
which is unique per mounted filesystem.
This covers the path when the device (that exists for all mounted
devices) name changes, updating /dev/root to /dev/sdx. Any other single
device with filesystem and is not mounted is still skipped.
Note that if a system is booted and initial mount is done on the
/dev/root device, this will be the cached name of the device. Only after
the command "btrfs device scan" it will change as it triggers the
rename.
The fix was verified by users whose systems were affected.
CC: stable(a)vger.kernel.org # 6.7+
Fixes: bc27d6f0aa0e ("btrfs: scan but don't register device on single device filesystem")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=218353
Link: https://lore.kernel.org/lkml/CAKLYgeJ1tUuqLcsquwuFqjDXPSJpEiokrWK2gisPKDZLs…
Signed-off-by: Anand Jain <anand.jain(a)oracle.com>
Tested-by: Alex Romosan <aromosan(a)gmail.com>
Tested-by: CHECK_1234543212345(a)protonmail.com
---
v2:
Updated git commit log from [PATCH] with permission. Thx.
[PATCH] btrfs: always scan a single device when mounted
Add Tested-by.
fs/btrfs/volumes.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 474ab7ed65ea..192c540a650c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1299,6 +1299,31 @@ int btrfs_forget_devices(dev_t devt)
return ret;
}
+static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
+ dev_t devt, bool mount_arg_dev)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+ struct btrfs_device *device;
+
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (device->devt == devt) {
+ mutex_unlock(&fs_devices->device_list_mutex);
+ return false;
+ }
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+ }
+
+ if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
+ !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
+ return true;
+
+ return false;
+}
+
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
@@ -1316,6 +1341,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
struct btrfs_device *device = NULL;
struct bdev_handle *bdev_handle;
u64 bytenr, bytenr_orig;
+ dev_t devt = 0;
int ret;
lockdep_assert_held(&uuid_mutex);
@@ -1355,18 +1381,16 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
goto error_bdev_put;
}
- if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
- !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
- dev_t devt;
+ ret = lookup_bdev(path, &devt);
+ if (ret)
+ btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
+ path, ret);
- ret = lookup_bdev(path, &devt);
- if (ret)
- btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
- path, ret);
- else
+ if (btrfs_skip_registration(disk_super, devt, mount_arg_dev)) {
+ pr_debug("BTRFS: skip registering single non-seed device %s\n",
+ path);
+ if (devt)
btrfs_free_stale_devices(devt, NULL);
-
- pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
device = NULL;
goto free_disk_super;
}
--
2.39.3
This commit resolves an issue in the tegra-xudc USB gadget driver that
incorrectly fetched USB3 PHY instances. The problem stemmed from the
assumption of a one-to-one correspondence between USB2 and USB3 PHY
names and their association with physical USB ports in the device tree.
Previously, the driver associated USB3 PHY names directly with the USB3
instance number, leading to mismatches when mapping the physical USB
ports. For instance, if using USB3-1 PHY, the driver expect the
corresponding PHY name as 'usb3-1'. However, the physical USB ports in
the device tree were designated as USB2-0 and USB3-0 as we only have
one device controller, causing a misalignment.
This commit rectifies the issue by adjusting the PHY naming logic.
Now, the driver correctly correlates the USB2 and USB3 PHY instances,
allowing the USB2-0 and USB3-1 PHYs to form a physical USB port pair
while accurately reflecting their configuration in the device tree by
naming them USB2-0 and USB3-0, respectively.
The change ensures that the PHY and PHY names align appropriately,
resolving the mismatch between physical USB ports and their associated
names in the device tree.
Fixes: b4e19931c98a ("usb: gadget: tegra-xudc: Support multiple device modes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
drivers/usb/gadget/udc/tegra-xudc.c | 39 ++++++++++++++++++-----------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index cb85168fd00c..7aa46d426f31 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3491,8 +3491,8 @@ static void tegra_xudc_device_params_init(struct tegra_xudc *xudc)
static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
{
- int err = 0, usb3;
- unsigned int i;
+ int err = 0, usb3_companion_port;
+ unsigned int i, j;
xudc->utmi_phy = devm_kcalloc(xudc->dev, xudc->soc->num_phys,
sizeof(*xudc->utmi_phy), GFP_KERNEL);
@@ -3520,7 +3520,7 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
if (IS_ERR(xudc->utmi_phy[i])) {
err = PTR_ERR(xudc->utmi_phy[i]);
dev_err_probe(xudc->dev, err,
- "failed to get usb2-%d PHY\n", i);
+ "failed to get PHY for phy-name usb2-%d\n", i);
goto clean_up;
} else if (xudc->utmi_phy[i]) {
/* Get usb-phy, if utmi phy is available */
@@ -3539,19 +3539,30 @@ static int tegra_xudc_phy_get(struct tegra_xudc *xudc)
}
/* Get USB3 phy */
- usb3 = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
- if (usb3 < 0)
+ usb3_companion_port = tegra_xusb_padctl_get_usb3_companion(xudc->padctl, i);
+ if (usb3_companion_port < 0)
continue;
- snprintf(phy_name, sizeof(phy_name), "usb3-%d", usb3);
- xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
- if (IS_ERR(xudc->usb3_phy[i])) {
- err = PTR_ERR(xudc->usb3_phy[i]);
- dev_err_probe(xudc->dev, err,
- "failed to get usb3-%d PHY\n", usb3);
- goto clean_up;
- } else if (xudc->usb3_phy[i])
- dev_dbg(xudc->dev, "usb3-%d PHY registered", usb3);
+ for (j = 0; j < xudc->soc->num_phys; j++) {
+ snprintf(phy_name, sizeof(phy_name), "usb3-%d", j);
+ xudc->usb3_phy[i] = devm_phy_optional_get(xudc->dev, phy_name);
+ if (IS_ERR(xudc->usb3_phy[i])) {
+ err = PTR_ERR(xudc->usb3_phy[i]);
+ dev_err_probe(xudc->dev, err,
+ "failed to get PHY for phy-name usb3-%d\n", j);
+ goto clean_up;
+ } else if (xudc->usb3_phy[i]) {
+ int usb2_port =
+ tegra_xusb_padctl_get_port_number(xudc->utmi_phy[i]);
+ int usb3_port =
+ tegra_xusb_padctl_get_port_number(xudc->usb3_phy[i]);
+ if (usb3_port == usb3_companion_port) {
+ dev_dbg(xudc->dev, "USB2 port %d is paired with USB3 port %d for device mode port %d\n",
+ usb2_port, usb3_port, i);
+ break;
+ }
+ }
+ }
}
return err;
--
2.25.1
From: Xiubo Li <xiubli(a)redhat.com>
The osd code has remove cursor initilizing code and this will make
the sparse read state into a infinite loop. We should initialize
the cursor just before each sparse-read in messnger v2.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/64607
Fixes: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket")
Reported-by: Luis Henriques <lhenriques(a)suse.de>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
V2:
- Just removed the unnecessary 'sparse_read_total' check.
net/ceph/messenger_v2.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index a0ca5414b333..ab3ab130a911 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg,
+ con->in_msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
--
2.43.0
This is the start of the stable review cycle for the 5.15.151 release.
There are 83 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 07 Mar 2024 11:31:11 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.151-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.151-rc2
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Gal Pressman <gal(a)nvidia.com>
Revert "tls: rx: move counting TlsDecryptErrors for sync"
Jakub Kicinski <kuba(a)kernel.org>
net: tls: fix async vs NIC crypto offload
Martynas Pumputis <m(a)lambda.lt>
bpf: Derive source IP addr via bpf_*_fib_lookup()
Louis DeLosSantos <louis.delos.devel(a)gmail.com>
bpf: Add table ID to bpf_fib_lookup BPF helper
Martin KaFai Lau <martin.lau(a)kernel.org>
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Teach lockdep about icc_bw_lock order"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Fix locking for runpm vs reclaim"
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Drop oob_skb ref before purging queue in GC.
Max Krummenacher <max.krummenacher(a)toradex.com>
Revert "drm/bridge: lt8912b: Register and attach our DSI device at probe"
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Abeni <pabeni(a)redhat.com>
mptcp: push at DSS boundaries
Geliang Tang <tanggeliang(a)kylinos.cn>
mptcp: add needs_id for netlink appending addr
Jean Sacren <sakiwit(a)gmail.com>
mptcp: clean up harmless false expressions
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter in v6
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter
Paolo Abeni <pabeni(a)redhat.com>
mptcp: rename timer related helper to less confusing names
Paolo Abeni <pabeni(a)redhat.com>
mptcp: process pending subflow error on close
Paolo Abeni <pabeni(a)redhat.com>
mptcp: move __mptcp_error_report in protocol.c
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Tadeusz Struk <tstruk(a)gigaio.com>
dmaengine: ptdma: use consistent DMA masks
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: firewire-lib: fix to check cycle continuity
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
fbcon: always restore the old font data in fbcon_do_set_font()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Sabrina Dubroca <sd(a)queasysnail.net>
tls: decrement decrypt_pending if no async completion will be called
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: use async as an in-out argument
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: assume crypto always calls our callback
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: move counting TlsDecryptErrors for sync
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't track the async count
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: factor out writing ContentType to cmsg
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: wrap decryption arguments in a structure
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't report text length from the bowels of decrypt
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop unnecessary arguments from tls_setup_from_iter()
Jakub Kicinski <kuba(a)kernel.org>
tls: hw: rx: use return value of tls_device_decrypted() to carry status
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: refactor decrypt_skb_update()
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't issue wake ups when data is decrypted
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the decryption status in socket context
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the record type in socket context
Oleksij Rempel <linux(a)rempel-privat.de>
igb: extend PTP timestamp adjustments to i211
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Florian Westphal <fw(a)strlen.de>
netfilter: bridge: confirm multicast packets before passing them up the stack
Florian Westphal <fw(a)strlen.de>
netfilter: let reset rules clean out conntrack entries
Florian Westphal <fw(a)strlen.de>
netfilter: make function op structures const
Florian Westphal <fw(a)strlen.de>
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
Florian Westphal <fw(a)strlen.de>
netfilter: nfnetlink_queue: silence bogus compiler warning
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Jakub Raczynski <j.raczynski(a)samsung.com>
stmmac: Clear variable when destroying workqueue
Justin Iurman <justin.iurman(a)uliege.be>
uapi: in6: replace temporary label with rfc9486
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Jakub Kicinski <kuba(a)kernel.org>
veth: try harder when allocating queue memory
Vasily Averin <vvs(a)openvz.org>
net: enable memcg accounting for veth queues
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Jakub Kicinski <kuba(a)kernel.org>
net: veth: clear GRO when clearing XDP even when down
Doug Smythies <dsmythies(a)telus.net>
cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: disallow timeout for anonymous sets
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/pgtable.h | 2 +-
arch/x86/kernel/cpu/intel.c | 178 ++++++------
drivers/cpufreq/intel_pstate.c | 3 +
drivers/dma/fsl-qdma.c | 25 +-
drivers/dma/ptdma/ptdma-dmaengine.c | 2 -
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 +-
drivers/interconnect/core.c | 18 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 +++-
drivers/mtd/nand/spi/gigadevice.c | 6 +-
drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +-
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/net/veth.c | 40 +--
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
drivers/video/fbdev/core/fbcon.c | 8 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
include/linux/netfilter.h | 14 +-
include/net/ipv6_stubs.h | 5 +
include/net/netfilter/nf_conntrack.h | 8 +
include/net/strparser.h | 4 +
include/net/tls.h | 11 +-
include/uapi/linux/bpf.h | 37 ++-
include/uapi/linux/in6.h | 2 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/bridge/br_netfilter_hooks.c | 96 +++++++
net/bridge/netfilter/nf_conntrack_bridge.c | 30 ++
net/core/filter.c | 67 ++++-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +-
net/ipv4/netfilter/nf_reject_ipv4.c | 1 +
net/ipv6/addrconf.c | 7 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/netfilter/nf_reject_ipv6.c | 1 +
net/mptcp/diag.c | 3 +
net/mptcp/pm_netlink.c | 30 +-
net/mptcp/protocol.c | 123 +++++++--
net/mptcp/subflow.c | 36 ---
net/netfilter/core.c | 45 +--
net/netfilter/nf_conntrack_core.c | 21 +-
net/netfilter/nf_conntrack_netlink.c | 4 +-
net/netfilter/nf_conntrack_proto_tcp.c | 35 +++
net/netfilter/nf_nat_core.c | 2 +-
net/netfilter/nf_tables_api.c | 7 +
net/netfilter/nfnetlink_queue.c | 10 +-
net/netfilter/nft_compat.c | 20 ++
net/netlink/af_netlink.c | 2 +-
net/tls/tls_device.c | 6 +-
net/tls/tls_sw.c | 316 ++++++++++------------
net/unix/garbage.c | 22 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
sound/firewire/amdtp-stream.c | 2 +-
tools/include/uapi/linux/bpf.h | 37 ++-
tools/testing/selftests/net/mptcp/config | 2 +
69 files changed, 991 insertions(+), 529 deletions(-)
From: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
Commit e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB
partitions.") added a mask check for 'part_type', but the mask used was
wrong leading to the code intended for rpmb also being executed for GP3.
On some MMCs (but not all) this would make gp3 partition inaccessible:
armadillo:~# head -c 1 < /dev/mmcblk2gp3
head: standard input: I/O error
armadillo:~# dmesg -c
[ 422.976583] mmc2: running CQE recovery
[ 423.058182] mmc2: running CQE recovery
[ 423.137607] mmc2: running CQE recovery
[ 423.137802] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x80700 phys_seg 4 prio class 0
[ 423.237125] mmc2: running CQE recovery
[ 423.318206] mmc2: running CQE recovery
[ 423.397680] mmc2: running CQE recovery
[ 423.397837] blk_update_request: I/O error, dev mmcblk2gp3, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[ 423.408287] Buffer I/O error on dev mmcblk2gp3, logical block 0, async page read
the part_type values of interest here are defined as follow:
main 0
boot0 1
boot1 2
rpmb 3
gp0 4
gp1 5
gp2 6
gp3 7
so mask with EXT_CSD_PART_CONFIG_ACC_MASK (7) to correctly identify rpmb
Fixes: e7794c14fd73 ("mmc: rpmb: fixes pause retune on all RPMB partitions.")
Cc: stable(a)vger.kernel.org
Cc: Jorge Ramirez-Ortiz <jorge(a)foundries.io>
Signed-off-by: Dominique Martinet <dominique.martinet(a)atmark-techno.com>
---
A couple of notes:
- this doesn't fail on all eMMCs, I can still access gp3 on some models
but it seems to fail reliably with micron's "G1M15L"
- I've encountered this on the 5.10 backport (in 5.10.208), so that'll
need to be backported everywhere the fix was taken...
Thanks!
---
drivers/mmc/core/block.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 32d49100dff5..86efa6084696 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -874,10 +874,11 @@ static const struct block_device_operations mmc_bdops = {
static int mmc_blk_part_switch_pre(struct mmc_card *card,
unsigned int part_type)
{
- const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB;
+ const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK;
+ const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB;
int ret = 0;
- if ((part_type & mask) == mask) {
+ if ((part_type & mask) == rpmb) {
if (card->ext_csd.cmdq_en) {
ret = mmc_cmdq_disable(card);
if (ret)
@@ -892,10 +893,11 @@ static int mmc_blk_part_switch_pre(struct mmc_card *card,
static int mmc_blk_part_switch_post(struct mmc_card *card,
unsigned int part_type)
{
- const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_RPMB;
+ const unsigned int mask = EXT_CSD_PART_CONFIG_ACC_MASK;
+ const unsigned int rpmb = EXT_CSD_PART_CONFIG_ACC_RPMB;
int ret = 0;
- if ((part_type & mask) == mask) {
+ if ((part_type & mask) == rpmb) {
mmc_retune_unpause(card->host);
if (card->reenable_cmdq && !card->ext_csd.cmdq_en)
ret = mmc_cmdq_enable(card);
---
base-commit: 5847c9777c303a792202c609bd761dceb60f4eed
change-id: 20240306-mmc-partswitch-c3a50b5084ae
Best regards,
--
Dominique Martinet | Asmadeus
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_changeset_destroy() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/of/dynamic.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 3bf27052832f..169e2a9ae22f 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) "OF: " fmt
+#include <linux/device.h>
#include <linux/of.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
@@ -667,6 +668,12 @@ void of_changeset_destroy(struct of_changeset *ocs)
{
struct of_changeset_entry *ce, *cen;
+ /*
+ * Wait for any ongoing device link removals before destroying some of
+ * nodes.
+ */
+ device_link_wait_removal();
+
list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
__of_changeset_entry_destroy(ce);
}
--
2.43.0
The commit 80dd33cf72d1 ("drivers: base: Fix device link removal")
introduces a workqueue to release the consumer and supplier devices used
in the devlink.
In the job queued, devices are release and in turn, when all the
references to these devices are dropped, the release function of the
device itself is called.
Nothing is present to provide some synchronisation with this workqueue
in order to ensure that all ongoing releasing operations are done and
so, some other operations can be started safely.
For instance, in the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are released and related devlinks are removed
(jobs pushed in the workqueue).
During the step 2, OF nodes are destroyed but, without any
synchronisation with devlink removal jobs, of_overlay_remove() can raise
warnings related to missing of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2
Indeed, the missing of_node_put() call is going to be done, too late,
from the workqueue job execution.
Introduce device_link_wait_removal() to offer a way to synchronize
operations waiting for the end of devlink removals (i.e. end of
workqueue jobs).
Also, as a flushing operation is done on the workqueue, the workqueue
used is moved from a system-wide workqueue to a local one.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/base/core.c | 26 +++++++++++++++++++++++---
include/linux/device.h | 1 +
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d5f4e4aac09b..48b28c59c592 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,7 @@ static bool fw_devlink_is_permissive(void);
static void __fw_devlink_link_to_consumers(struct device *dev);
static bool fw_devlink_drv_reg_done;
static bool fw_devlink_best_effort;
+static struct workqueue_struct *device_link_wq;
/**
* __fwnode_link_add - Create a link between two fwnode_handles.
@@ -532,12 +533,26 @@ static void devlink_dev_release(struct device *dev)
/*
* It may take a while to complete this work because of the SRCU
* synchronization in device_link_release_fn() and if the consumer or
- * supplier devices get deleted when it runs, so put it into the "long"
- * workqueue.
+ * supplier devices get deleted when it runs, so put it into the
+ * dedicated workqueue.
*/
- queue_work(system_long_wq, &link->rm_work);
+ queue_work(device_link_wq, &link->rm_work);
}
+/**
+ * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate
+ */
+void device_link_wait_removal(void)
+{
+ /*
+ * devlink removal jobs are queued in the dedicated work queue.
+ * To be sure that all removal jobs are terminated, ensure that any
+ * scheduled work has run to completion.
+ */
+ flush_workqueue(device_link_wq);
+}
+EXPORT_SYMBOL_GPL(device_link_wait_removal);
+
static struct class devlink_class = {
.name = "devlink",
.dev_groups = devlink_groups,
@@ -4099,9 +4114,14 @@ int __init devices_init(void)
sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj);
if (!sysfs_dev_char_kobj)
goto char_kobj_err;
+ device_link_wq = alloc_workqueue("device_link_wq", 0, 0);
+ if (!device_link_wq)
+ goto wq_err;
return 0;
+ wq_err:
+ kobject_put(sysfs_dev_char_kobj);
char_kobj_err:
kobject_put(sysfs_dev_block_kobj);
block_kobj_err:
diff --git a/include/linux/device.h b/include/linux/device.h
index 1795121dee9a..d7d8305a72e8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1249,6 +1249,7 @@ void device_link_del(struct device_link *link);
void device_link_remove(void *consumer, struct device *supplier);
void device_links_supplier_sync_state_pause(void);
void device_links_supplier_sync_state_resume(void);
+void device_link_wait_removal(void);
/* Create alias, so I can be autoloaded. */
#define MODULE_ALIAS_CHARDEV(major,minor) \
--
2.43.0
The quilt patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been removed from the -mm tree. Its filename was
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Wed, 6 Mar 2024 14:03:56 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_l
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
The patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been added to the -mm mm-unstable branch. Its filename is
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Wed, 6 Mar 2024 14:03:56 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240306140356.3974886-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_l
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
---
v3->v4
- remove unused variable declaration in function uas_submit_urbs.
drivers/usb/storage/uas.c | 28 +++++++++++++---------------
1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..5930cfc03111 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,30 +541,28 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
struct uas_dev_info *devinfo)
{
struct uas_cmd_info *cmdinfo = scsi_cmd_priv(cmnd);
- struct urb *urb;
int err;
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +570,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +580,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +590,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +600,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +609,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +619,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +696,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
--
2.32.0
In the scenario of entering hibernation with udisk in the system, if the
udisk was gone or resume fail in the thaw phase of hibernation. Its state
will be set to NOTATTACHED. At this point, usb_hub_wq was already freezed
and can't not handle disconnect event. Next, in the poweroff phase of
hibernation, SYNCHRONIZE_CACHE SCSI command will be sent to this udisk
when poweroff this scsi device, which will cause uas_submit_urbs to be
called to submit URB for sense/data/cmd pipe. However, these URBs will
submit fail as device was set to NOTATTACHED state. Then, uas_submit_urbs
will return a value SCSI_MLQUEUE_DEVICE_BUSY to the caller. That will lead
the SCSI layer go into an ugly loop and system fail to go into hibernation.
On the other hand, when we specially check for -ENODEV in function
uas_queuecommand_lck, returning DID_ERROR to SCSI layer will cause device
poweroff fail and system shutdown instead of entering hibernation.
To fix this issue, let uas_submit_urbs to return original generic error
when submitting URB failed. At the same time, we need to translate -ENODEV
to DID_NOT_CONNECT for the SCSI layer.
Suggested-by: Oliver Neukum <oneukum(a)suse.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Weitao Wang <WeitaoWang-oc(a)zhaoxin.com>
---
v2->v3
- Modify the description of this patch.
- An error is returned directly when submitting URB fails.
drivers/usb/storage/uas.c | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 9707f53cfda9..689396777b6f 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -533,7 +533,7 @@ static struct urb *uas_alloc_cmd_urb(struct uas_dev_info *devinfo, gfp_t gfp,
* daft to me.
*/
-static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
+static int uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
{
struct uas_dev_info *devinfo = cmnd->device->hostdata;
struct urb *urb;
@@ -541,16 +541,15 @@ static struct urb *uas_submit_sense_urb(struct scsi_cmnd *cmnd, gfp_t gfp)
urb = uas_alloc_sense_urb(devinfo, gfp, cmnd);
if (!urb)
- return NULL;
+ return -ENOMEM;
usb_anchor_urb(urb, &devinfo->sense_urbs);
err = usb_submit_urb(urb, gfp);
if (err) {
usb_unanchor_urb(urb);
uas_log_cmd_state(cmnd, "sense submit err", err);
usb_free_urb(urb);
- return NULL;
}
- return urb;
+ return err;
}
static int uas_submit_urbs(struct scsi_cmnd *cmnd,
@@ -562,9 +561,9 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
lockdep_assert_held(&devinfo->lock);
if (cmdinfo->state & SUBMIT_STATUS_URB) {
- urb = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
- if (!urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ err = uas_submit_sense_urb(cmnd, GFP_ATOMIC);
+ if (err)
+ return err;
cmdinfo->state &= ~SUBMIT_STATUS_URB;
}
@@ -572,7 +571,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_in_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_FROM_DEVICE);
if (!cmdinfo->data_in_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_IN_URB;
}
@@ -582,7 +581,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_in_urb);
uas_log_cmd_state(cmnd, "data in submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_IN_URB;
cmdinfo->state |= DATA_IN_URB_INFLIGHT;
@@ -592,7 +591,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
cmdinfo->data_out_urb = uas_alloc_data_urb(devinfo, GFP_ATOMIC,
cmnd, DMA_TO_DEVICE);
if (!cmdinfo->data_out_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_DATA_OUT_URB;
}
@@ -602,7 +601,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->data_out_urb);
uas_log_cmd_state(cmnd, "data out submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->state &= ~SUBMIT_DATA_OUT_URB;
cmdinfo->state |= DATA_OUT_URB_INFLIGHT;
@@ -611,7 +610,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (cmdinfo->state & ALLOC_CMD_URB) {
cmdinfo->cmd_urb = uas_alloc_cmd_urb(devinfo, GFP_ATOMIC, cmnd);
if (!cmdinfo->cmd_urb)
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return -ENOMEM;
cmdinfo->state &= ~ALLOC_CMD_URB;
}
@@ -621,7 +620,7 @@ static int uas_submit_urbs(struct scsi_cmnd *cmnd,
if (err) {
usb_unanchor_urb(cmdinfo->cmd_urb);
uas_log_cmd_state(cmnd, "cmd submit err", err);
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ return err;
}
cmdinfo->cmd_urb = NULL;
cmdinfo->state &= ~SUBMIT_CMD_URB;
@@ -698,7 +697,7 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd)
* of queueing, no matter how fatal the error
*/
if (err == -ENODEV) {
- set_host_byte(cmnd, DID_ERROR);
+ set_host_byte(cmnd, DID_NO_CONNECT);
scsi_done(cmnd);
goto zombie;
}
--
2.32.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d5566623ad..c37af50f312d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index fcb7eb6a6eca..c0a8f3c9e256 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -447,8 +447,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
This fix is stable-only, since after commit 07e09c483cbe ("mm/huge_memory:
work on folio->swap instead of page->private when splitting folio"),
subpages of a swapcached THP no longer requires the maintenance.
Adding THPs to the swapcache was introduced in commit
38d8b4e6bdc87 ("mm, THP, swap: delay splitting THP during swap out"),
where each subpage of a THP added to the swapcache had its own swapcache
entry and required the ->private field to point to the correct swapcache
entry. Later, when THP migration functionality was implemented in commit
616b8371539a6 ("mm: thp: enable thp migration in generic path"),
it initially did not handle the subpages of swapcached THPs, failing to
update their ->private fields or replace the subpage pointers in the
swapcache. Subsequently, commit e71769ae5260 ("mm: enable thp migration
for shmem thp") addressed the swapcache update aspect. This patch fixes
the update of subpage ->private fields.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 034b0662fd3b..9cfd53eaeb4e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,8 +441,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
Hello everyone,
#regzbot introduced v6.7.5..v6.7.6
I'm experiencing an issue where kexec does a full firmware reboot
instead of kexec reboot.
Issue first submitted at OpenSuse bugzilla [0].
OS details as follows:
Distributor ID: openSUSE
Description: openSUSE Tumbleweed-Slowroll
Release: 20240213
Issue has been reproduced by building kernel from source.
kexec works as expected in kernel v6.7.5.
kexec does full firmware reboot in kernel v6.7.6.
I followed the docs here [1] to perform git bisect and find the culprit,
hope it's alright as I'm quite out of my depth here.
Git bisect logs:
git bisect start
# status: waiting for both good and bad commits
# bad: [b631f5b445dc3379f67ff63a2e4c58f22d4975dc] Linux 6.7.6
git bisect bad b631f5b445dc3379f67ff63a2e4c58f22d4975dc
# status: waiting for good commit(s), bad commit known
# good: [004dcea13dc10acaf1486d9939be4c793834c13c] Linux 6.7.5
git bisect good 004dcea13dc10acaf1486d9939be4c793834c13c
Let me know if there's anything else I can do to help troubleshoot the
issue.
[0]: https://bugzilla.suse.com/show_bug.cgi?id=1220541
[1]: https://docs.kernel.org/admin-guide/bug-bisect.html
Kind regards,
Pavin Joseph.
I'm announcing the release of the 4.19.309 kernel.
All users of the 4.19 kernel series must upgrade.
The updated 4.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 ++--
drivers/mmc/core/mmc.c | 2 ++
drivers/net/gtp.c | 12 ++++++------
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 ++-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++-
fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++----
fs/cachefiles/bind.c | 3 +++
net/bluetooth/hci_core.c | 7 ++++---
net/bluetooth/hci_event.c | 9 ++++++++-
net/bluetooth/l2cap_core.c | 8 +++++++-
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 ++
sound/core/Makefile | 1 -
17 files changed, 64 insertions(+), 24 deletions(-)
Alexander Ofitserov (1):
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Arnd Bergmann (1):
efi/capsule-loader: fix incorrect allocation size
Arturas Moskvinas (1):
gpio: 74x164: Enable output pins after registers are reset
Baokun Li (1):
cachefiles: fix memory leak in cachefiles_add_cache()
David Sterba (1):
btrfs: dev-replace: properly validate device names
Greg Kroah-Hartman (1):
Linux 4.19.309
Hans de Goede (1):
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Ivan Semenov (1):
mmc: core: Fix eMMC initialization with 1-bit bus connection
Javier Carrasco (1):
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Johannes Berg (1):
wifi: nl80211: reject iftype change with mesh ID change
Kai-Heng Feng (1):
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz (1):
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Oleksij Rempel (1):
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Ryosuke Yasuoka (1):
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Takashi Iwai (1):
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Ying Hsu (1):
Bluetooth: Avoid potential use-after-free in hci_error_reset
Yunjian Wang (1):
tun: Fix xdp_rxq_info's queue_index when detaching
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from
a test case. But there has been agreement based on code review that this
is possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was not free. This isn't present in get_swap_device()
because it doesn't make sense in general due to the race between getting
the reference and swapoff. So I've added an equivalent check directly in
free_swap_and_cache().
Details of how to provoke one possible issue (thanks to David
Hildenbrand for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Cc: stable(a)vger.kernel.org
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
---
Hi Andrew,
Please replace v1 of this patch in mm-unstable with this version.
Changes since v1:
- Added comments for get_swap_device() as suggested by David
- Moved check that swap entry is not free from get_swap_device() to
free_swap_and_cache() since there are some paths that legitimately call with
a free offset.
I haven't addressed the recommendation by Huang Ying [1] to also revert commit
23b230ba8ac3 ("mm/swap: print bad swap offset entry in get_swap_device"). It
should be done separately to this, and and we need to conclude discussion
first.
[1] https://lore.kernel.org/all/875xy0842q.fsf@yhuang6-desk2.ccr.corp.intel.com/
Thanks,
Ryan
mm/swapfile.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2b3a2d85e350..1155a6304119 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1232,6 +1232,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
* with get_swap_device() and put_swap_device(), unless the swap
* functions call get/put_swap_device() by themselves.
*
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
* Check whether swap entry is valid in the swap device. If so,
* return pointer to swap_info_struct, and keep the swap entry valid
* via preventing the swap device from being swapoff, until
@@ -1609,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entry)
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
+ if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+ put_swap_device(p);
+ return 0;
+ }
+
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
--
2.25.1
This is the start of the stable review cycle for the 5.10.212 release.
There are 41 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 07 Mar 2024 11:31:02 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.212-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.10.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.10.212-rc2
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Chuanhong Guo <gch981213(a)gmail.com>
mtd: spinand: gigadevice: fix Quad IO for GD5F1GQ5UExxG
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Baokun Li <libaokun1(a)huawei.com>
ext4: avoid bb_free and bb_fragments inconsistency in mb_free_blocks()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Reto Schneider <reto.schneider(a)husqvarnagroup.com>
mtd: spinand: gigadevice: Support GD5F1GQ5UExxG
zhenwei pi <pizhenwei(a)bytedance.com>
crypto: virtio/akcipher - Fix stack overflow on memcpy
Hans de Goede <hdegoede(a)redhat.com>
platform/x86: touchscreen_dmi: Allow partial (prefix) matches for ACPI names
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/pgtable.h | 2 +-
arch/x86/kernel/cpu/intel.c | 178 +++++++++++----------
.../crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +-
drivers/dma/fsl-qdma.c | 25 +--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 ++++--
drivers/mtd/nand/spi/gigadevice.c | 81 ++++++++--
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/platform/x86/touchscreen_dmi.c | 4 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 ++-
fs/cachefiles/bind.c | 3 +
fs/ext4/mballoc.c | 39 ++---
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++-
net/ipv6/addrconf.c | 7 +-
net/mptcp/diag.c | 3 +
net/mptcp/protocol.c | 49 ++++++
net/netfilter/nft_compat.c | 20 +++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
36 files changed, 430 insertions(+), 196 deletions(-)
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c93dd6a31c31..c5968021fde0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -423,8 +423,12 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_test_swapbacked(folio)) {
__folio_set_swapbacked(newfolio);
if (folio_test_swapcache(folio)) {
+ int i;
+
folio_set_swapcache(newfolio);
- newfolio->private = folio_get_private(folio);
+ for (i = 0; i < nr; i++)
+ set_page_private(folio_page(newfolio, i),
+ page_private(folio_page(folio, i)));
}
entries = nr;
} else {
--
2.43.0
Fuzzing of 5.10 stable branch reports a slab-out-of-bounds error in
ata_scsi_pass_thru.
The error is fixed in 5.18 by commit
ce70fd9a551af7424a7dace2a1ba05a7de8eae27.
Backporting this commit would require significant changes to the code so
it is bettter to use a simple fix for that particular error.
The problem is that the length of the received SCSI command is not
validated if scsi_op == VARIABLE_LENGTH_CMD. It can lead to out-of-bounds
reading if the user sends a request with SCSI command of length less than
32.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Signed-off-by: Artem Sadovnikov <ancowi69(a)gmail.com>
Signed-off-by: Mikhail Ivanov <iwanov-23(a)bk.ru>
---
drivers/ata/libata-scsi.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index dfa090ccd21c..77589e911d3d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4065,6 +4065,9 @@ int __ata_scsi_queuecmd(struct scsi_cmnd *scmd, struct ata_device *dev)
if (unlikely(!scmd->cmd_len))
goto bad_cdb_len;
+
+ if (scsi_op == VARIABLE_LENGTH_CMD && scmd->cmd_len < 32)
+ goto bad_cdb_len;
if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
if (unlikely(scmd->cmd_len > dev->cdb_len))
--
2.25.1
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Looks like the undelayed vblank gets signalled exactly when
the active period ends. That is a problem for DSB+VRR when
we are already in vblank and expect DSB to start executing
as soon as we send the push. Instead of starting the DSB
just keeps on waiting for the undelayed vblank which won't
signal until the end of the next frame's active period,
which is far too late.
The end result is that DSB won't have even started
executing by the time the flips/etc. have completed.
We then wait for an extra 1ms, after which we terminate
the DSB and report a timeout:
[drm] *ERROR* [CRTC:80:pipe A] DSB 0 timed out waiting for idle (current head=0xfedf4000, head=0x0, tail=0x1080)
To fix this let's configure DSB to use the so called VRR
"safe window" instead of the undelayed vblank to trigger
the DSB vblank logic, when VRR is enabled.
Cc: stable(a)vger.kernel.org
Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/intel_dsb.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index d62e050185e7..e4515bf92038 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -340,6 +340,17 @@ static int intel_dsb_dewake_scanline(const struct intel_crtc_state *crtc_state)
return max(0, vblank_start - intel_usecs_to_scanlines(adjusted_mode, latency));
}
+static u32 dsb_chicken(struct intel_crtc *crtc)
+{
+ if (crtc->mode_flags & I915_MODE_FLAG_VRR)
+ return DSB_CTRL_WAIT_SAFE_WINDOW |
+ DSB_CTRL_NO_WAIT_VBLANK |
+ DSB_INST_WAIT_SAFE_WINDOW |
+ DSB_INST_NO_WAIT_VBLANK;
+ else
+ return 0;
+}
+
static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
int dewake_scanline)
{
@@ -361,6 +372,9 @@ static void _intel_dsb_commit(struct intel_dsb *dsb, u32 ctrl,
intel_de_write_fw(dev_priv, DSB_CTRL(pipe, dsb->id),
ctrl | DSB_ENABLE);
+ intel_de_write_fw(dev_priv, DSB_CHICKEN(pipe, dsb->id),
+ dsb_chicken(crtc));
+
intel_de_write_fw(dev_priv, DSB_HEAD(pipe, dsb->id),
intel_dsb_buffer_ggtt_offset(&dsb->dsb_buf));
--
2.43.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Looks like TRANS_CHICKEN bit 31 means something totally different
depending on the platform:
TGL: generate VRR "safe window" for DSB
ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
So far we've only set this on ADL/DG2, but when using DSB+VRR
we also need to set it on TGL.
And a quick test on MTL says it doesn't need this bit for either
of those purposes, even though it's still documented as valid
in bspec.
Cc: stable(a)vger.kernel.org
Fixes: 34d8311f4a1c ("drm/i915/dsb: Re-instate DSB for LUT updates")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9927
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/intel_vrr.c | 7 ++++---
drivers/gpu/drm/i915/i915_reg.h | 2 +-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 5d905f932cb4..eb5bd0743902 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -187,10 +187,11 @@ void intel_vrr_set_transcoder_timings(const struct intel_crtc_state *crtc_state)
enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
/*
- * TRANS_SET_CONTEXT_LATENCY with VRR enabled
- * requires this chicken bit on ADL/DG2.
+ * This bit seems to have two meanings depending on the platform:
+ * TGL: generate VRR "safe window" for DSB vblank waits
+ * ADL/DG2: make TRANS_SET_CONTEXT_LATENCY effective with VRR
*/
- if (DISPLAY_VER(dev_priv) == 13)
+ if (IS_DISPLAY_VER(dev_priv, 12, 13))
intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder),
0, PIPE_VBLANK_WITH_DELAY);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e00557e1a57f..3b2e49ce29ba 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4599,7 +4599,7 @@
#define MTL_CHICKEN_TRANS(trans) _MMIO_TRANS((trans), \
_MTL_CHICKEN_TRANS_A, \
_MTL_CHICKEN_TRANS_B)
-#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* ADL/DG2 */
+#define PIPE_VBLANK_WITH_DELAY REG_BIT(31) /* tgl+ */
#define SKL_UNMASK_VBL_TO_PIPE_IN_SRD REG_BIT(30) /* skl+ */
#define HSW_FRAME_START_DELAY_MASK REG_GENMASK(28, 27)
#define HSW_FRAME_START_DELAY(x) REG_FIELD_PREP(HSW_FRAME_START_DELAY_MASK, x)
--
2.43.0
Hello There,
Would you be interested in building your website?
We are a professional Web Design & Development or Mobile Apps development
company based in India.
Our aim is to be the best in service and as such we offer a premium service
at very competitive prices.
*We specialize in:-*
1. Website Design
2. Web Development
3. Responsive Websites
4. PHP Development
5. E-Commerce Solutions
6. Mobile Apps Development
We operate 24 x7. I will be happy to send you links to price list, money
back guarantee, client rankings, client testimonials, “How we are different
from others?”, and “Why should you choose us?” on receiving a response from
you.
Drop me a line if you need any assistance.
*Kind Regards*
Anjali
This is the start of the stable review cycle for the 5.4.271 release.
There are 25 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.271-rc…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.4.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.4.271-rc1
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
-------------
Diffstat:
Makefile | 4 +-
arch/x86/kernel/cpu/intel.c | 178 +++++++++++++++--------------
drivers/dma/fsl-qdma.c | 25 ++--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/mmc/core/mmc.c | 2 +
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +++-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 9 +-
net/bluetooth/l2cap_core.c | 8 +-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +++--
net/ipv6/addrconf.c | 7 +-
net/netfilter/nft_compat.c | 20 ++++
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 +
sound/core/Makefile | 1 -
25 files changed, 226 insertions(+), 143 deletions(-)
Programming PMU events in the host that count during guest execution is
a feature supported by perf, e.g.
perf stat -e cpu_cycles:G ./lkvm run
While this works for VHE, the guest/host event bitmaps are not carried
through to the hypervisor in the nVHE configuration. Make
kvm_pmu_update_vcpu_events() conditional on whether or not _hardware_
supports PMUv3 rather than if the vCPU as vPMU enabled.
Cc: stable(a)vger.kernel.org
Fixes: 84d751a019a9 ("KVM: arm64: Pass pmu events to hyp via vcpu")
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
---
include/kvm/arm_pmu.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 4b9d8fb393a8..df32355e3e38 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
*/
#define kvm_pmu_update_vcpu_events(vcpu) \
do { \
- if (!has_vhe() && kvm_vcpu_has_pmu(vcpu)) \
+ if (!has_vhe() && kvm_arm_support_pmu_v3()) \
vcpu->arch.pmu.events = *kvm_get_pmu_events(); \
} while (0)
--
2.44.0.278.ge034bb2e1d-goog
This is the start of the stable review cycle for the 4.19.309 release.
There are 16 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.309-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.309-rc1
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Oleksij Rempel <o.rempel(a)pengutronix.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
-------------
Diffstat:
Makefile | 4 ++--
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 ++--
drivers/mmc/core/mmc.c | 2 ++
drivers/net/gtp.c | 12 ++++++------
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 ++-
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +++-
fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++----
fs/cachefiles/bind.c | 3 +++
net/bluetooth/hci_core.c | 7 ++++---
net/bluetooth/hci_event.c | 9 ++++++++-
net/bluetooth/l2cap_core.c | 8 +++++++-
net/netlink/af_netlink.c | 2 +-
net/wireless/nl80211.c | 2 ++
sound/core/Makefile | 1 -
17 files changed, 65 insertions(+), 25 deletions(-)
In the following sequence:
1) of_platform_depopulate()
2) of_overlay_remove()
During the step 1, devices are destroyed and devlinks are removed.
During the step 2, OF nodes are destroyed but
__of_changeset_entry_destroy() can raise warnings related to missing
of_node_put():
ERROR: memory leak, expected refcount 1 instead of 2 ...
Indeed, during the devlink removals performed at step 1, the removal
itself releasing the device (and the attached of_node) is done by a job
queued in a workqueue and so, it is done asynchronously with respect to
function calls.
When the warning is present, of_node_put() will be called but wrongly
too late from the workqueue job.
In order to be sure that any ongoing devlink removals are done before
the of_node destruction, synchronize the of_overlay_remove() with the
devlink removals.
Fixes: 80dd33cf72d1 ("drivers: base: Fix device link removal")
Cc: stable(a)vger.kernel.org
Signed-off-by: Herve Codina <herve.codina(a)bootlin.com>
---
drivers/of/overlay.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
index 2ae7e9d24a64..7a010a62b9d8 100644
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "OF: overlay: " fmt
+#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -853,6 +854,14 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
{
int i;
+ /*
+ * Wait for any ongoing device link removals before removing some of
+ * nodes. Drop the global lock while waiting
+ */
+ mutex_unlock(&of_mutex);
+ device_link_wait_removal();
+ mutex_lock(&of_mutex);
+
if (ovcs->cset.entries.next)
of_changeset_destroy(&ovcs->cset);
@@ -862,7 +871,6 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs)
ovcs->id = 0;
}
-
for (i = 0; i < ovcs->count; i++) {
of_node_put(ovcs->fragments[i].target);
of_node_put(ovcs->fragments[i].overlay);
--
2.43.0
Since commit b2cb2ae22278f1918f7526b89760ee00b4a81393 ("mfd: axp20x:
Generalise handling without interrupt"), interrupt info part for the
AXP15060 PMIC is not needed anymore for Statfive Visionfive 2 board.
And this would cause kernel to try to enable interrupt line 0, which is
not expected. So delete this part from device tree.
Cc: stable(a)vger.kernel.org
Fixes: b2cb2ae22278 ("mfd: axp20x: Generalise handling without interrupt")
Reported-by: Bo Gan <ganboing(a)gmail.com>
Signed-off-by: Shengyu Qu <wiagn233(a)outlook.com>
---
arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
index b89e9791efa7..6bebabe3fa37 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
@@ -189,9 +189,6 @@ &i2c5 {
axp15060: pmic@36 {
compatible = "x-powers,axp15060";
reg = <0x36>;
- interrupts = <0>;
- interrupt-controller;
- #interrupt-cells = <1>;
regulators {
vcc_3v3: dcdc1 {
--
2.39.2
Clang enables -Wenum-enum-conversion and -Wenum-compare-conditional
under -Wenum-conversion. A recent change in Clang strengthened these
warnings and they appear frequently in common builds, primarily due to
several instances in common headers but there are quite a few drivers
that have individual instances as well.
include/linux/vmstat.h:508:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
508 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
| ~~~~~~~~~~~~~~~~~~~~~ ^
509 | item];
| ~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:955:24: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
955 | flags |= is_new_rate ? IWL_MAC_BEACON_CCK
| ^ ~~~~~~~~~~~~~~~~~~
956 | : IWL_MAC_BEACON_CCK_V1;
| ~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c:1120:21: warning: conditional expression between different enumeration types ('enum iwl_mac_beacon_flags' and 'enum iwl_mac_beacon_flags_v1') [-Wenum-compare-conditional]
1120 | 0) > 10 ?
| ^
1121 | IWL_MAC_BEACON_FILS :
| ~~~~~~~~~~~~~~~~~~~
1122 | IWL_MAC_BEACON_FILS_V1;
| ~~~~~~~~~~~~~~~~~~~~~~
While doing arithmetic with different types of enums may be potentially
problematic, inspecting several instances of the warning does not reveal
any obvious problems. To silence the warnings at the source level, an
integral cast must be added to each mismatched enum (which is incredibly
ugly when done frequently) or the value must moved out of the enum to a
macro, which can remove the type safety offered by enums in other
places, such as assignments that would trigger -Wenum-conversion.
As the warnings do not appear to have a high signal to noise ratio and
the source level silencing options are not sustainable, disable the
warnings unconditionally, as they will be enabled with -Wenum-conversion
and are supported in all versions of clang that can build the kernel.
Cc: stable(a)vger.kernel.org
Closes: https://github.com/ClangBuiltLinux/linux/issues/2002
Link: https://github.com/llvm/llvm-project/commit/8c2ae42b3e1c6aa7c18f873edcebff7…
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
scripts/Makefile.extrawarn | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index a9e552a1e910..6053aa22b8f5 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -81,6 +81,14 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# Warn if there is an enum types mismatch
KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
+ifdef CONFIG_CC_IS_CLANG
+# Clang enables these extra warnings under -Wenum-conversion but the kernel
+# performs arithmetic using or has conditionals returning enums of different
+# types in several different places, which is rarely a bug in the kernel's
+# case, so disable the warnings.
+KBUILD_CFLAGS += -Wno-enum-compare-conditional
+KBUILD_CFLAGS += -Wno-enum-enum-conversion
+endif
#
# W=1 - warnings which may be relevant and do not occur too often
---
base-commit: 90d35da658da8cff0d4ecbb5113f5fac9d00eb72
change-id: 20240304-disable-extra-clang-enum-warnings-bf574c7c99fd
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 171573613c39..893ea04498f7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -514,8 +514,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 034b0662fd3b..9cfd53eaeb4e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -441,8 +441,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Closes: https://lore.kernel.org/linux-mm/1707814102-22682-1-git-send-email-quic_cha…
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index fcb7eb6a6eca..c0a8f3c9e256 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -447,8 +447,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
This is the backport of recently upstreamed series that moves VERW
execution to a later point in exit-to-user path. This is needed because
in some cases it may be possible for data accessed after VERW executions
may end into MDS affected CPU buffers. Moving VERW closer to ring
transition reduces the attack surface.
- The series includes a dependency commit f87bc8dc7a7c ("x86/asm: Add
_ASM_RIP() macro for x86-64 (%rip) suffix").
- Patch 2 includes a change that adds runtime patching for jmp (instead
of verw in original series) due to lack of rip-relative relocation
support in kernels <v6.5.
- Fixed warning:
arch/x86/entry/entry.o: warning: objtool: mds_verw_sel+0x0: unreachable instruction.
- Resolved merge conflicts in:
syscall_return_via_sysret in entry_64.S
swapgs_restore_regs_and_return_to_usermode in entry_64.S.
__vmx_vcpu_run in vmenter.S.
vmx_update_fb_clear_dis in vmx.c.
- Boot tested with KASLR and KPTI enabled.
- Verified VERW being executed with mitigation ON.
To: stable(a)vger.kernel.org
Signed-off-by: Pawan Gupta <pawan.kumar.gupta(a)linux.intel.com>
---
H. Peter Anvin (Intel) (1):
x86/asm: Add _ASM_RIP() macro for x86-64 (%rip) suffix
Pawan Gupta (5):
x86/bugs: Add asm helpers for executing VERW
x86/entry_64: Add VERW just before userspace transition
x86/entry_32: Add VERW just before userspace transition
x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key
KVM/VMX: Move VERW closer to VMentry for MDS mitigation
Sean Christopherson (1):
KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH
Documentation/x86/mds.rst | 38 +++++++++++++++++++++++++-----------
arch/x86/entry/entry.S | 23 ++++++++++++++++++++++
arch/x86/entry/entry_32.S | 3 +++
arch/x86/entry/entry_64.S | 10 ++++++++++
arch/x86/entry/entry_64_compat.S | 1 +
arch/x86/include/asm/asm.h | 5 +++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/entry-common.h | 1 -
arch/x86/include/asm/irqflags.h | 1 +
arch/x86/include/asm/nospec-branch.h | 27 +++++++++++++------------
arch/x86/kernel/cpu/bugs.c | 15 ++++++--------
arch/x86/kernel/nmi.c | 3 ---
arch/x86/kvm/vmx/run_flags.h | 7 +++++--
arch/x86/kvm/vmx/vmenter.S | 9 ++++++---
arch/x86/kvm/vmx/vmx.c | 12 ++++++++----
15 files changed, 111 insertions(+), 45 deletions(-)
---
base-commit: 9985c44f239fa0db0f3b4a1aee80794f113c135c
change-id: 20240304-delay-verw-backport-5-10-y-00aad69432f4
Best regards,
--
Thanks,
Pawan
Hi,
this series does basically two things:
1. Disables automatic load balancing as adviced by the hardware
workaround.
2. Assigns all the CCS slices to one single user engine. The user
will then be able to query only one CCS engine
I'm using here the "Requires: " tag, but I'm not sure the commit
id will be valid, on the other hand, I don't know what commit id
I should use.
Thanks Tvrtko, Matt and John for your reviews!
Andi
Changelog
=========
v2 -> v3
- Simplified the algorithm for creating the list of the exported
uabi engines. (Patch 1) (Thanks, Tvrtko)
- Consider the fused engines when creating the uabi engine list
(Patch 2) (Thanks, Matt)
- Patch 4 now uses a the refactoring from patch 1, in a cleaner
outcome.
v1 -> v2
- In Patch 1 use the correct workaround number (thanks Matt).
- In Patch 2 do not add the extra CCS engines to the exposed UABI
engine list and adapt the engine counting accordingly (thanks
Tvrtko).
- Reword the commit of Patch 2 (thanks John).
Andi Shyti (4):
drm/i915/gt: Refactor uabi engine class/instance list creation
drm/i915/gt: Do not exposed fused off engines.
drm/i915/gt: Disable HW load balancing for CCS
drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/gt/intel_engine_user.c | 52 ++++++++++++++++-----
drivers/gpu/drm/i915/gt/intel_gt.c | 11 +++++
drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 ++
drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 +++
4 files changed, 60 insertions(+), 12 deletions(-)
--
2.43.0
The first kiocb_set_cancel_fn() argument may point at a struct kiocb
that is not embedded inside struct aio_kiocb. With the current code,
depending on the compiler, the req->ki_ctx read happens either before
the IOCB_AIO_RW test or after that test. Move the req->ki_ctx read such
that it is guaranteed that the IOCB_AIO_RW test happens first.
Reported-by: Eric Biggers <ebiggers(a)kernel.org>
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Fixes: b820de741ae4 ("fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index da18dbcfcb22..9cdaa2faa536 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -589,8 +589,8 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
+ struct aio_kiocb *req;
+ struct kioctx *ctx;
unsigned long flags;
/*
@@ -600,9 +600,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
if (!(iocb->ki_flags & IOCB_AIO_RW))
return;
+ req = container_of(iocb, struct aio_kiocb, rw);
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
+ ctx = req->ki_ctx;
+
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
Currently arm64's switch_mm() doesn't always have an smp_mb()
which the core scheduler code has depended upon since commit:
commit 223baf9d17f25 ("sched: Fix performance regression introduced by mm_cid")
If switch_mm() doesn't call smp_mb(), sched_mm_cid_remote_clear()
can unset the activly used cid when it fails to observe active task after it
sets lazy_put.
By adding an smp_mb() in arm64's check_and_switch_context(),
Guarantee to observe active task after sched_mm_cid_remote_clear()
success to set lazy_put.
Signed-off-by: levi.yun <yeoreum.yun(a)arm.com>
Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid")
Cc: <stable(a)vger.kernel.org> # 6.4.x
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Aaron Lu <aaron.lu(a)intel.com>
---
I'm really sorry if you got this multiple times.
I had some problems with the SMTP server...
arch/arm64/mm/context.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 188197590fc9..7a9e8e6647a0 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -268,6 +268,11 @@ void check_and_switch_context(struct mm_struct *mm)
*/
if (!system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
+
+ /*
+ * See the comments on switch_mm_cid describing user -> user transition.
+ */
+ smp_mb();
}
unsigned long arm64_mm_context_get(struct mm_struct *mm)
--
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Patch "fs/aio: Make io_cancel() generate completions again" is based on the
assumption that calling kiocb->ki_cancel() does not complete R/W requests.
This is incorrect: the two drivers that call kiocb_set_cancel_fn() callers
set a cancellation function that calls usb_ep_dequeue(). According to its
documentation, usb_ep_dequeue() calls the completion routine with status
-ECONNRESET. Hence this revert.
Cc: Benjamin LaHaise <ben(a)communityfibre.ca>
Cc: Eric Biggers <ebiggers(a)google.com>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: stable(a)vger.kernel.org
Reported-by: syzbot+b91eb2ed18f599dd3c31(a)syzkaller.appspotmail.com
Fixes: 54cbc058d86b ("fs/aio: Make io_cancel() generate completions again")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
fs/aio.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 28223f511931..da18dbcfcb22 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2165,11 +2165,14 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
#endif
/* sys_io_cancel:
- * Attempts to cancel an iocb previously passed to io_submit(). If the
- * operation is successfully cancelled 0 is returned. May fail with
- * -EFAULT if any of the data structures pointed to are invalid. May
- * fail with -EINVAL if aio_context specified by ctx_id is invalid. Will
- * fail with -ENOSYS if not implemented.
+ * Attempts to cancel an iocb previously passed to io_submit. If
+ * the operation is successfully cancelled, the resulting event is
+ * copied into the memory pointed to by result without being placed
+ * into the completion queue and 0 is returned. May fail with
+ * -EFAULT if any of the data structures pointed to are invalid.
+ * May fail with -EINVAL if aio_context specified by ctx_id is
+ * invalid. May fail with -EAGAIN if the iocb specified was not
+ * cancelled. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
@@ -2200,12 +2203,14 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
}
spin_unlock_irq(&ctx->ctx_lock);
- /*
- * The result argument is no longer used - the io_event is always
- * delivered via the ring buffer.
- */
- if (ret == 0 && kiocb->rw.ki_flags & IOCB_AIO_RW)
- aio_complete_rw(&kiocb->rw, -EINTR);
+ if (!ret) {
+ /*
+ * The result argument is no longer used - the io_event is
+ * always delivered via the ring buffer. -EINPROGRESS indicates
+ * cancellation is progress:
+ */
+ ret = -EINPROGRESS;
+ }
percpu_ref_put(&ctx->users);
This is the start of the stable review cycle for the 5.15.151 release.
There are 84 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Wed, 06 Mar 2024 21:15:26 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.15.151-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.15.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 5.15.151-rc1
Davide Caratti <dcaratti(a)redhat.com>
mptcp: fix double-free on socket dismantle
Gal Pressman <gal(a)nvidia.com>
Revert "tls: rx: move counting TlsDecryptErrors for sync"
Jakub Kicinski <kuba(a)kernel.org>
net: tls: fix async vs NIC crypto offload
Martynas Pumputis <m(a)lambda.lt>
bpf: Derive source IP addr via bpf_*_fib_lookup()
Louis DeLosSantos <louis.delos.devel(a)gmail.com>
bpf: Add table ID to bpf_fib_lookup BPF helper
Martin KaFai Lau <martin.lau(a)kernel.org>
bpf: Add BPF_FIB_LOOKUP_SKIP_NEIGH for bpf_fib_lookup
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Teach lockdep about icc_bw_lock order"
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "interconnect: Fix locking for runpm vs reclaim"
Bartosz Golaszewski <bartosz.golaszewski(a)linaro.org>
gpio: fix resource unwinding order in error path
Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
gpiolib: Fix the error path order in gpiochip_add_data_with_key()
Arturas Moskvinas <arturas.moskvinas(a)gmail.com>
gpio: 74x164: Enable output pins after registers are reset
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Drop oob_skb ref before purging queue in GC.
Max Krummenacher <max.krummenacher(a)toradex.com>
Revert "drm/bridge: lt8912b: Register and attach our DSI device at probe"
Oscar Salvador <osalvador(a)suse.de>
fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super
Baokun Li <libaokun1(a)huawei.com>
cachefiles: fix memory leak in cachefiles_add_cache()
Paolo Abeni <pabeni(a)redhat.com>
mptcp: fix possible deadlock in subflow diag
Paolo Abeni <pabeni(a)redhat.com>
mptcp: push at DSS boundaries
Geliang Tang <tanggeliang(a)kylinos.cn>
mptcp: add needs_id for netlink appending addr
Jean Sacren <sakiwit(a)gmail.com>
mptcp: clean up harmless false expressions
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter in v6
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
selftests: mptcp: add missing kconfig for NF Filter
Paolo Abeni <pabeni(a)redhat.com>
mptcp: rename timer related helper to less confusing names
Paolo Abeni <pabeni(a)redhat.com>
mptcp: process pending subflow error on close
Paolo Abeni <pabeni(a)redhat.com>
mptcp: move __mptcp_error_report in protocol.c
Paolo Bonzini <pbonzini(a)redhat.com>
x86/cpu/intel: Detect TME keyid bits before setting MTRR mask registers
Bjorn Andersson <quic_bjorande(a)quicinc.com>
pmdomain: qcom: rpmhpd: Fix enabled_corner aggregation
Zong Li <zong.li(a)sifive.com>
riscv: add CALLER_ADDRx support
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: fix PHY init clock stability
Elad Nachman <enachman(a)marvell.com>
mmc: sdhci-xenon: add timeout for PHY init complete
Ivan Semenov <ivan(a)semenov.dev>
mmc: core: Fix eMMC initialization with 1-bit bus connection
Curtis Klein <curtis.klein(a)hpe.com>
dmaengine: fsl-qdma: init irq after reg initialization
Tadeusz Struk <tstruk(a)gigaio.com>
dmaengine: ptdma: use consistent DMA masks
Peng Ma <peng.ma(a)nxp.com>
dmaengine: fsl-qdma: fix SoC may hang on 16 byte unaligned read
David Sterba <dsterba(a)suse.com>
btrfs: dev-replace: properly validate device names
Johannes Berg <johannes.berg(a)intel.com>
wifi: nl80211: reject iftype change with mesh ID change
Alexander Ofitserov <oficerovas(a)altlinux.org>
gtp: fix use-after-free and null-ptr-deref in gtp_newlink()
Takashi Sakamoto <o-takashi(a)sakamocchi.jp>
ALSA: firewire-lib: fix to check cycle continuity
Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
tomoyo: fix UAF write bug in tomoyo_write_control()
Dimitris Vlachos <dvlachos(a)ics.forth.gr>
riscv: Sparse-Memory/vmemmap out-of-bounds fix
David Howells <dhowells(a)redhat.com>
afs: Fix endless loop in directory parsing
Jiri Slaby (SUSE) <jirislaby(a)kernel.org>
fbcon: always restore the old font data in fbcon_do_set_font()
Takashi Iwai <tiwai(a)suse.de>
ALSA: Drop leftover snd-rtctimer stuff from Makefile
Hans de Goede <hdegoede(a)redhat.com>
power: supply: bq27xxx-i2c: Do not free non existing IRQ
Arnd Bergmann <arnd(a)arndb.de>
efi/capsule-loader: fix incorrect allocation size
Sabrina Dubroca <sd(a)queasysnail.net>
tls: decrement decrypt_pending if no async completion will be called
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: use async as an in-out argument
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: assume crypto always calls our callback
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: move counting TlsDecryptErrors for sync
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't track the async count
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: factor out writing ContentType to cmsg
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: wrap decryption arguments in a structure
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't report text length from the bowels of decrypt
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: drop unnecessary arguments from tls_setup_from_iter()
Jakub Kicinski <kuba(a)kernel.org>
tls: hw: rx: use return value of tls_device_decrypted() to carry status
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: refactor decrypt_skb_update()
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't issue wake ups when data is decrypted
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the decryption status in socket context
Jakub Kicinski <kuba(a)kernel.org>
tls: rx: don't store the record type in socket context
Oleksij Rempel <linux(a)rempel-privat.de>
igb: extend PTP timestamp adjustments to i211
Lin Ma <linma(a)zju.edu.cn>
rtnetlink: fix error logic of IFLA_BRIDGE_FLAGS writing back
Florian Westphal <fw(a)strlen.de>
netfilter: bridge: confirm multicast packets before passing them up the stack
Florian Westphal <fw(a)strlen.de>
netfilter: let reset rules clean out conntrack entries
Florian Westphal <fw(a)strlen.de>
netfilter: make function op structures const
Florian Westphal <fw(a)strlen.de>
netfilter: core: move ip_ct_attach indirection to struct nf_ct_hook
Florian Westphal <fw(a)strlen.de>
netfilter: nfnetlink_queue: silence bogus compiler warning
Ignat Korchagin <ignat(a)cloudflare.com>
netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate()
Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Bluetooth: Enforce validation on max value of connection interval
Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Bluetooth: hci_event: Fix handling of HCI_EV_IO_CAPA_REQUEST
Zijun Hu <quic_zijuhu(a)quicinc.com>
Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
Ying Hsu <yinghsu(a)chromium.org>
Bluetooth: Avoid potential use-after-free in hci_error_reset
Jakub Raczynski <j.raczynski(a)samsung.com>
stmmac: Clear variable when destroying workqueue
Justin Iurman <justin.iurman(a)uliege.be>
uapi: in6: replace temporary label with rfc9486
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
net: usb: dm9601: fix wrong return value in dm9601_mdio_read
Jakub Kicinski <kuba(a)kernel.org>
veth: try harder when allocating queue memory
Vasily Averin <vvs(a)openvz.org>
net: enable memcg accounting for veth queues
Oleksij Rempel <linux(a)rempel-privat.de>
lan78xx: enable auto speed configuration for LAN7850 if no EEPROM is detected
Eric Dumazet <edumazet(a)google.com>
ipv6: fix potential "struct net" leak in inet6_rtm_getaddr()
Jakub Kicinski <kuba(a)kernel.org>
net: veth: clear GRO when clearing XDP even when down
Doug Smythies <dsmythies(a)telus.net>
cpufreq: intel_pstate: fix pstate limits enforcement for adjust_perf call back
Yunjian Wang <wangyunjian(a)huawei.com>
tun: Fix xdp_rxq_info's queue_index when detaching
Florian Westphal <fw(a)strlen.de>
net: ip_tunnel: prevent perpetual headroom growth
Ryosuke Yasuoka <ryasuoka(a)redhat.com>
netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter
Han Xu <han.xu(a)nxp.com>
mtd: spinand: gigadevice: Fix the get ecc status issue
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nf_tables: disallow timeout for anonymous sets
-------------
Diffstat:
Makefile | 4 +-
arch/riscv/include/asm/ftrace.h | 5 +
arch/riscv/include/asm/pgtable.h | 2 +-
arch/riscv/kernel/Makefile | 2 +
arch/riscv/kernel/return_address.c | 48 ++++
arch/x86/kernel/cpu/intel.c | 178 ++++++------
drivers/cpufreq/intel_pstate.c | 3 +
drivers/dma/fsl-qdma.c | 25 +-
drivers/dma/ptdma/ptdma-dmaengine.c | 2 -
drivers/firmware/efi/capsule-loader.c | 2 +-
drivers/gpio/gpio-74x164.c | 4 +-
drivers/gpio/gpiolib.c | 12 +-
drivers/gpu/drm/bridge/lontium-lt8912b.c | 11 +-
drivers/interconnect/core.c | 18 +-
drivers/mmc/core/mmc.c | 2 +
drivers/mmc/host/sdhci-xenon-phy.c | 48 +++-
drivers/mtd/nand/spi/gigadevice.c | 6 +-
drivers/net/ethernet/intel/igb/igb_ptp.c | 5 +-
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +-
drivers/net/gtp.c | 12 +-
drivers/net/tun.c | 1 +
drivers/net/usb/dm9601.c | 2 +-
drivers/net/usb/lan78xx.c | 3 +-
drivers/net/veth.c | 40 +--
drivers/power/supply/bq27xxx_battery_i2c.c | 4 +-
drivers/soc/qcom/rpmhpd.c | 7 +-
drivers/video/fbdev/core/fbcon.c | 8 +-
fs/afs/dir.c | 4 +-
fs/btrfs/dev-replace.c | 24 +-
fs/cachefiles/bind.c | 3 +
fs/hugetlbfs/inode.c | 6 +-
include/linux/netfilter.h | 14 +-
include/net/ipv6_stubs.h | 5 +
include/net/netfilter/nf_conntrack.h | 8 +
include/net/strparser.h | 4 +
include/net/tls.h | 11 +-
include/uapi/linux/bpf.h | 37 ++-
include/uapi/linux/in6.h | 2 +-
net/bluetooth/hci_core.c | 7 +-
net/bluetooth/hci_event.c | 13 +-
net/bluetooth/l2cap_core.c | 8 +-
net/bridge/br_netfilter_hooks.c | 96 +++++++
net/bridge/netfilter/nf_conntrack_bridge.c | 30 ++
net/core/filter.c | 67 ++++-
net/core/rtnetlink.c | 11 +-
net/ipv4/ip_tunnel.c | 28 +-
net/ipv4/netfilter/nf_reject_ipv4.c | 1 +
net/ipv6/addrconf.c | 7 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/netfilter/nf_reject_ipv6.c | 1 +
net/mptcp/diag.c | 3 +
net/mptcp/pm_netlink.c | 30 +-
net/mptcp/protocol.c | 123 +++++++--
net/mptcp/subflow.c | 36 ---
net/netfilter/core.c | 45 +--
net/netfilter/nf_conntrack_core.c | 21 +-
net/netfilter/nf_conntrack_netlink.c | 4 +-
net/netfilter/nf_conntrack_proto_tcp.c | 35 +++
net/netfilter/nf_nat_core.c | 2 +-
net/netfilter/nf_tables_api.c | 7 +
net/netfilter/nfnetlink_queue.c | 10 +-
net/netfilter/nft_compat.c | 20 ++
net/netlink/af_netlink.c | 2 +-
net/tls/tls_device.c | 6 +-
net/tls/tls_sw.c | 316 ++++++++++------------
net/unix/garbage.c | 22 +-
net/wireless/nl80211.c | 2 +
security/tomoyo/common.c | 3 +-
sound/core/Makefile | 1 -
sound/firewire/amdtp-stream.c | 2 +-
tools/include/uapi/linux/bpf.h | 37 ++-
tools/testing/selftests/net/mptcp/config | 2 +
72 files changed, 1046 insertions(+), 529 deletions(-)
Larry Finger <Larry.Finger(a)gmail.com> wrote:
> From: Nick Morrow <morrownr(a)gmail.com>
>
> Add VID/PIDs that are known to be missing for this driver.
>
> Removed /* 8811CU */ and /* 8821CU */ as they are redundant
> since the file is specific to those chips.
>
> Removed /* TOTOLINK A650UA v3 */ as the manufacturer. It has a REALTEK
> VID so it may not be specific to this adapter.
>
> Verified and tested.
>
> Cc: stable(a)vger.kernel.org
> Signed-off-by: Nick Morrow <morrownr(a)gmail.com>
> Signed-off-by: Larry Finger <Larry.Finger(a)lwfinger.net>
> Acked-by: Ping-Ke Shih <pkshih(a)realtek.com>
Patch applied to wireless-next.git, thanks.
b8a62478f3b1 wifi: rtw88: Add missing VID/PIDs for 8811CU and 8821CU
--
https://patchwork.kernel.org/project/linux-wireless/patch/4ume7mjw63u7.XlMU…https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatc…
The patch titled
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
has been added to the -mm mm-unstable branch. Its filename is
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryan Roberts <ryan.roberts(a)arm.com>
Subject: mm: swap: fix race between free_swap_and_cache() and swapoff()
Date: Tue, 5 Mar 2024 15:13:49 +0000
There was previously a theoretical window where swapoff() could run and
teardown a swap_info_struct while a call to free_swap_and_cache() was
running in another thread. This could cause, amongst other bad
possibilities, swap_page_trans_huge_swapped() (called by
free_swap_and_cache()) to access the freed memory for swap_map.
This is a theoretical problem and I haven't been able to provoke it from a
test case. But there has been agreement based on code review that this is
possible (see link below).
Fix it by using get_swap_device()/put_swap_device(), which will stall
swapoff(). There was an extra check in _swap_info_get() to confirm that
the swap entry was valid. This wasn't present in get_swap_device() so
I've added it. I couldn't find any existing get_swap_device() call sites
where this extra check would cause any false alarms.
Details of how to provoke one possible issue (thanks to David Hildenbrand
for deriving this):
--8<-----
__swap_entry_free() might be the last user and result in
"count == SWAP_HAS_CACHE".
swapoff->try_to_unuse() will stop as soon as soon as si->inuse_pages==0.
So the question is: could someone reclaim the folio and turn
si->inuse_pages==0, before we completed swap_page_trans_huge_swapped().
Imagine the following: 2 MiB folio in the swapcache. Only 2 subpages are
still references by swap entries.
Process 1 still references subpage 0 via swap entry.
Process 2 still references subpage 1 via swap entry.
Process 1 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
[then, preempted in the hypervisor etc.]
Process 2 quits. Calls free_swap_and_cache().
-> count == SWAP_HAS_CACHE
Process 2 goes ahead, passes swap_page_trans_huge_swapped(), and calls
__try_to_reclaim_swap().
__try_to_reclaim_swap()->folio_free_swap()->delete_from_swap_cache()->
put_swap_folio()->free_swap_slot()->swapcache_free_entries()->
swap_entry_free()->swap_range_free()->
...
WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
What stops swapoff to succeed after process 2 reclaimed the swap cache
but before process1 finished its call to swap_page_trans_huge_swapped()?
--8<-----
Link: https://lkml.kernel.org/r/20240305151349.3781428-1-ryan.roberts@arm.com
Fixes: 7c00bafee87c ("mm/swap: free swap slots in batch")
Closes: https://lore.kernel.org/linux-mm/65a66eb9-41f8-4790-8db2-0c70ea15979f@redha…
Signed-off-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/swapfile.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
--- a/mm/swapfile.c~mm-swap-fix-race-between-free_swap_and_cache-and-swapoff
+++ a/mm/swapfile.c
@@ -1281,7 +1281,9 @@ struct swap_info_struct *get_swap_device
smp_rmb();
offset = swp_offset(entry);
if (offset >= si->max)
- goto put_out;
+ goto bad_offset;
+ if (data_race(!si->swap_map[swp_offset(entry)]))
+ goto bad_free;
return si;
bad_nofile:
@@ -1289,9 +1291,14 @@ bad_nofile:
out:
return NULL;
put_out:
- pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
percpu_ref_put(&si->users);
return NULL;
+bad_offset:
+ pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
+ goto put_out;
+bad_free:
+ pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
+ goto put_out;
}
static unsigned char __swap_entry_free(struct swap_info_struct *p,
@@ -1609,13 +1616,14 @@ int free_swap_and_cache(swp_entry_t entr
if (non_swap_entry(entry))
return 1;
- p = _swap_info_get(entry);
+ p = get_swap_device(entry);
if (p) {
count = __swap_entry_free(p, entry);
if (count == SWAP_HAS_CACHE &&
!swap_page_trans_huge_swapped(p, entry))
__try_to_reclaim_swap(p, swp_offset(entry),
TTRS_UNMAPPED | TTRS_FULL);
+ put_swap_device(p);
}
return p != NULL;
}
_
Patches currently in -mm which might be from ryan.roberts(a)arm.com are
mm-swap-fix-race-between-free_swap_and_cache-and-swapoff.patch
From: Zi Yan <ziy(a)nvidia.com>
The tail pages in a THP can have swap entry information stored in their
private field. When migrating to a new page, all tail pages of the new
page need to update ->private to avoid future data corruption.
Corresponding swapcache entries need to be updated as well.
e71769ae5260 ("mm: enable thp migration for shmem thp") fixed it already.
Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index c7d5566623ad..c37af50f312d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -424,8 +424,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage);
if (PageSwapCache(page)) {
+ int i;
+
SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
+ for (i = 0; i < (1 << compound_order(page)); i++)
+ set_page_private(newpage + i,
+ page_private(page + i));
}
} else {
VM_BUG_ON_PAGE(PageSwapCache(page), page);
--
2.43.0
Commit fb24ea52f78e0d595852e ("drivers: Remove explicit invocations of
mmiowb()") remove all mmiowb() in drivers, but it says:
"NOTE: mmiowb() has only ever guaranteed ordering in conjunction with
spin_unlock(). However, pairing each mmiowb() removal in this patch with
the corresponding call to spin_unlock() is not at all trivial, so there
is a small chance that this change may regress any drivers incorrectly
relying on mmiowb() to order MMIO writes between CPUs using lock-free
synchronisation."
The mmio in radeon_ring_commit() is protected by a mutex rather than a
spinlock, but in the mutex fastpath it behaves similar to spinlock. We
can add mmiowb() calls in the radeon driver but the maintainer says he
doesn't like such a workaround, and radeon is not the only example of
mutex protected mmio.
So we should extend the mmiowb tracking system from spinlock to mutex,
and maybe other locking primitives. This is not easy and error prone, so
we solve it in the architectural code, by simply defining the __io_aw()
hook as mmiowb(). And we no longer need to override queued_spin_unlock()
so use the generic definition.
Without this, we get such an error when run 'glxgears' on weak ordering
architectures such as LoongArch:
radeon 0000:04:00.0: ring 0 stalled for more than 10324msec
radeon 0000:04:00.0: ring 3 stalled for more than 10240msec
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000001f412 last fence id 0x000000000001f414 on ring 3)
radeon 0000:04:00.0: GPU lockup (current fence id 0x000000000000f940 last fence id 0x000000000000f941 on ring 0)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
radeon 0000:04:00.0: scheduling IB failed (-35).
[drm:radeon_gem_va_ioctl [radeon]] *ERROR* Couldn't update BO_VA (-35)
Link: https://lore.kernel.org/dri-devel/29df7e26-d7a8-4f67-b988-44353c4270ac@amd.…
Link: https://lore.kernel.org/linux-arch/20240301130532.3953167-1-chenhuacai@loon…
Cc: stable(a)vger.kernel.org
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/Kbuild | 1 +
arch/loongarch/include/asm/io.h | 2 ++
arch/loongarch/include/asm/qspinlock.h | 18 ------------------
3 files changed, 3 insertions(+), 18 deletions(-)
delete mode 100644 arch/loongarch/include/asm/qspinlock.h
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index a97c0edbb866..2dbec7853ae8 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
generic-y += qrwlock.h
+generic-y += qspinlock.h
generic-y += rwsem.h
generic-y += segment.h
generic-y += user.h
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b66..4a8adcca329b 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
#define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
#define memcpy_toio(c, a, l) __memcpy_toio((c), (a), (l))
+#define __io_aw() mmiowb()
+
#include <asm-generic/io.h>
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad591..000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
- compiletime_assert_atomic_type(lock->locked);
- c_sync();
- WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
--
2.43.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Reinstate commit 88b065943cb5 ("drm/i915/dsi: Do display on
sequence later on icl+"), for the most part. Turns out some
machines (eg. Chuwi Minibook X) really do need that updated order.
It is also the order the Windows driver uses.
However we can't just undo the revert since that would again
break Lenovo 82TQ. After staring at the VBT sequences for both
machines I've concluded that the Lenovo 82TQ sequences look
somewhat broken:
- INIT_OTP is not present at all
- what should be in INIT_OTP is found in DISPLAY_ON
- what should be in DISPLAY_ON is found in BACKLIGHT_ON
(along with the actual backlight stuff)
The Chuwi Minibook X on the other hand has a full complement
of sequences in its VBT.
So let's try to deal with the broken sequences in the
Lenovo 82TQ VBT by simply swapping the (non-existent)
INIT_OTP sequence with the DISPLAY_ON sequence. Thus we
execute DISPLAY_ON when intending to execute INIT_OTP,
and execute nothing at all when intending to execute
DISPLAY_ON. That should be 100% equivalent to the
revert, for such broken VBTs.
Cc: stable(a)vger.kernel.org
Fixes: dc524d05974f ("Revert "drm/i915/dsi: Do display on sequence later on icl+"")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/10071
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10334
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/icl_dsi.c | 3 +-
drivers/gpu/drm/i915/display/intel_bios.c | 43 +++++++++++++++++++----
2 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index eda4a8b88590..ac456a2275db 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,7 +1155,6 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1256,6 +1255,8 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
+
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 343726de9aa7..373291d10af9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1955,16 +1955,12 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
* these devices we split the init OTP sequence into a deassert sequence and
* the actual init OTP part.
*/
-static void fixup_mipi_sequences(struct drm_i915_private *i915,
- struct intel_panel *panel)
+static void vlv_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
{
u8 *init_otp;
int len;
- /* Limit this to VLV for now. */
- if (!IS_VALLEYVIEW(i915))
- return;
-
/* Limit this to v1 vid-mode sequences */
if (panel->vbt.dsi.config->is_cmd_mode ||
panel->vbt.dsi.seq_version != 1)
@@ -2000,6 +1996,41 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915,
panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
}
+/*
+ * Some machines (eg. Lenovo 82TQ) appear to have broken
+ * VBT sequences:
+ * - INIT_OTP is not present at all
+ * - what should be in INIT_OTP is in DISPLAY_ON
+ * - what should be in DISPLAY_ON is in BACKLIGHT_ON
+ * (along with the actual backlight stuff)
+ *
+ * To make those work we simply swap DISPLAY_ON and INIT_OTP.
+ *
+ * TODO: Do we need to limit this to specific machines,
+ * or examine the contents of the sequences to
+ * avoid false positives?
+ */
+static void icl_fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] &&
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]) {
+ drm_dbg_kms(&i915->drm, "Broken VBT: Swapping INIT_OTP and DISPLAY_ON sequences\n");
+
+ swap(panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP],
+ panel->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON]);
+ }
+}
+
+static void fixup_mipi_sequences(struct drm_i915_private *i915,
+ struct intel_panel *panel)
+{
+ if (DISPLAY_VER(i915) >= 11)
+ icl_fixup_mipi_sequences(i915, panel);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_fixup_mipi_sequences(i915, panel);
+}
+
static void
parse_mipi_sequence(struct drm_i915_private *i915,
struct intel_panel *panel)
--
2.43.0
With the to-be-fixed commit, the reset_work handler cleared 'host->mrq'
outside of the spinlock protected critical section. That leaves a small
race window during execution of 'tmio_mmc_reset()' where the done_work
handler could grab a pointer to the now invalid 'host->mrq'. Both would
use it to call mmc_request_done() causing problems (see link below).
However, 'host->mrq' cannot simply be cleared earlier inside the
critical section. That would allow new mrqs to come in asynchronously
while the actual reset of the controller still needs to be done. So,
like 'tmio_mmc_set_ios()', an ERR_PTR is used to prevent new mrqs from
coming in but still avoiding concurrency between work handlers.
Reported-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Closes: https://lore.kernel.org/all/20240220061356.3001761-1-dirk.behme@de.bosch.co…
Fixes: df3ef2d3c92c ("mmc: protect the tmio_mmc driver against a theoretical race")
Signed-off-by: Wolfram Sang <wsa+renesas(a)sang-engineering.com>
Tested-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Reviewed-by: Dirk Behme <dirk.behme(a)de.bosch.com>
Cc: stable(a)vger.kernel.org # 3.0+
---
Change since v1/RFT: added Dirk's tags and stable tag
@Ulf: this is nasty, subtle stuff. Would be awesome to have it in 6.8
already!
drivers/mmc/host/tmio_mmc_core.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index be7f18fd4836..c253d176db69 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -259,6 +259,8 @@ static void tmio_mmc_reset_work(struct work_struct *work)
else
mrq->cmd->error = -ETIMEDOUT;
+ /* No new calls yet, but disallow concurrent tmio_mmc_done_work() */
+ host->mrq = ERR_PTR(-EBUSY);
host->cmd = NULL;
host->data = NULL;
--
2.43.0
If VM_BIND is enabled on the client the legacy submission ioctl can't be
used, however if a client tries to do so regardless it will return an
error. In this case the clients mutex remained unlocked leading to a
deadlock inside nouveau_drm_postclose or any other nouveau ioctl call.
Fixes: b88baab82871 ("drm/nouveau: implement new VM_BIND uAPI")
Cc: Danilo Krummrich <dakr(a)redhat.com>
Cc: <stable(a)vger.kernel.org> # v6.6+
Signed-off-by: Karol Herbst <kherbst(a)redhat.com>
Reviewed-by: Lyude Paul <lyude(a)redhat.com>
Reviewed-by: Danilo Krummrich <dakr(a)redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240304183157.1587152-1-kher…
---
drivers/gpu/drm/nouveau/nouveau_gem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 49c2bcbef1299..5a887d67dc0e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -764,7 +764,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
return -ENOMEM;
if (unlikely(nouveau_cli_uvmm(cli)))
- return -ENOSYS;
+ return nouveau_abi16_put(abi16, -ENOSYS);
list_for_each_entry(temp, &abi16->channels, head) {
if (temp->chan->chid == req->channel) {
--
2.44.0
Ring expansion checker may incorrectly assume a completely full ring
is empty, missing the need for expansion.
This is due to a special empty ring case where the dequeue ends up
ahead of the enqueue pointer. This is seen when enqueued TRBs fill up
exactly a segment, with enqueue then pointing to the end link TRB.
Once those TRBs are handled the dequeue pointer will follow the link
TRB and end up pointing to the first entry on the next segment, past
the enqueue.
This same enqueue - dequeue condition can be true if a ring is full,
with enqueue ending on that last link TRB before the dequeue pointer
on the next segment.
This can be seen when queuing several ~510 small URBs via usbfs in
one go before a single one is handled (i.e. dequeue not moved from first
entry in segment).
Expand the ring already when enqueue reaches the link TRB before the
dequeue segment, instead of expanding it when enqueue moves into the
dequeue segment.
Reported-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Closes: https://lore.kernel.org/all/949223224.833962.1709339266739.JavaMail.zimbra@…
Tested-by: Chris Yokum <linux-usb(a)mail.totalphase.com>
Fixes: f5af638f0609 ("xhci: Fix transfer ring expansion size calculation")
Cc: stable(a)vger.kernel.org # v6.5+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f0d8a607ff21..4f64b814d4aa 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -326,7 +326,13 @@ static unsigned int xhci_ring_expansion_needed(struct xhci_hcd *xhci, struct xhc
/* how many trbs will be queued past the enqueue segment? */
trbs_past_seg = enq_used + num_trbs - (TRBS_PER_SEGMENT - 1);
- if (trbs_past_seg <= 0)
+ /*
+ * Consider expanding the ring already if num_trbs fills the current
+ * segment (i.e. trbs_past_seg == 0), not only when num_trbs goes into
+ * the next segment. Avoids confusing full ring with special empty ring
+ * case below
+ */
+ if (trbs_past_seg < 0)
return 0;
/* Empty ring special case, enqueue stuck on link trb while dequeue advanced */
--
2.25.1