The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d1adb25df7111de83b64655a80b5a135adbded61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024110617-blame-taekwondo-ba51@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d1adb25df7111de83b64655a80b5a135adbded61 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Date: Fri, 15 Dec 2023 20:07:52 +0800
Subject: [PATCH] mm: migrate: fix getting incorrect page mapping during page
migration
When running stress-ng testing, we found below kernel crash after a few hours:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
pc : dentry_name+0xd8/0x224
lr : pointer+0x22c/0x370
sp : ffff800025f134c0
......
Call trace:
dentry_name+0xd8/0x224
pointer+0x22c/0x370
vsnprintf+0x1ec/0x730
vscnprintf+0x2c/0x60
vprintk_store+0x70/0x234
vprintk_emit+0xe0/0x24c
vprintk_default+0x3c/0x44
vprintk_func+0x84/0x2d0
printk+0x64/0x88
__dump_page+0x52c/0x530
dump_page+0x14/0x20
set_migratetype_isolate+0x110/0x224
start_isolate_page_range+0xc4/0x20c
offline_pages+0x124/0x474
memory_block_offline+0x44/0xf4
memory_subsys_offline+0x3c/0x70
device_offline+0xf0/0x120
......
After analyzing the vmcore, I found this issue is caused by page migration.
The scenario is that, one thread is doing page migration, and we will use the
target page's ->mapping field to save 'anon_vma' pointer between page unmap and
page move, and now the target page is locked and refcount is 1.
Currently, there is another stress-ng thread performing memory hotplug,
attempting to offline the target page that is being migrated. It discovers that
the refcount of this target page is 1, preventing the offline operation, thus
proceeding to dump the page. However, page_mapping() of the target page may
return an incorrect file mapping to crash the system in dump_mapping(), since
the target page->mapping only saves 'anon_vma' pointer without setting
PAGE_MAPPING_ANON flag.
There are seveval ways to fix this issue:
(1) Setting the PAGE_MAPPING_ANON flag for target page's ->mapping when saving
'anon_vma', but this can confuse PageAnon() for PFN walkers, since the target
page has not built mappings yet.
(2) Getting the page lock to call page_mapping() in __dump_page() to avoid crashing
the system, however, there are still some PFN walkers that call page_mapping()
without holding the page lock, such as compaction.
(3) Using target page->private field to save the 'anon_vma' pointer and 2 bits
page state, just as page->mapping records an anonymous page, which can remove
the page_mapping() impact for PFN walkers and also seems a simple way.
So I choose option 3 to fix this issue, and this can also fix other potential
issues for PFN walkers, such as compaction.
Link: https://lkml.kernel.org/r/e60b17a88afc38cb32f84c3e30837ec70b343d2b.17026417…
Fixes: 64c8902ed441 ("migrate_pages: split unmap_and_move() to _unmap() and _move()")
Signed-off-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Reviewed-by: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Xu Yu <xuyu(a)linux.alibaba.com>
Cc: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/migrate.c b/mm/migrate.c
index 397f2a6e34cb..bad3039d165e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1025,38 +1025,31 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
}
/*
- * To record some information during migration, we use some unused
- * fields (mapping and private) of struct folio of the newly allocated
- * destination folio. This is safe because nobody is using them
- * except us.
+ * To record some information during migration, we use unused private
+ * field of struct folio of the newly allocated destination folio.
+ * This is safe because nobody is using it except us.
*/
-union migration_ptr {
- struct anon_vma *anon_vma;
- struct address_space *mapping;
-};
-
enum {
PAGE_WAS_MAPPED = BIT(0),
PAGE_WAS_MLOCKED = BIT(1),
+ PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED,
};
static void __migrate_folio_record(struct folio *dst,
- unsigned long old_page_state,
+ int old_page_state,
struct anon_vma *anon_vma)
{
- union migration_ptr ptr = { .anon_vma = anon_vma };
- dst->mapping = ptr.mapping;
- dst->private = (void *)old_page_state;
+ dst->private = (void *)anon_vma + old_page_state;
}
static void __migrate_folio_extract(struct folio *dst,
int *old_page_state,
struct anon_vma **anon_vmap)
{
- union migration_ptr ptr = { .mapping = dst->mapping };
- *anon_vmap = ptr.anon_vma;
- *old_page_state = (unsigned long)dst->private;
- dst->mapping = NULL;
+ unsigned long private = (unsigned long)dst->private;
+
+ *anon_vmap = (struct anon_vma *)(private & ~PAGE_OLD_STATES);
+ *old_page_state = private & PAGE_OLD_STATES;
dst->private = NULL;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x be0e822bb3f5259c7f9424ba97e8175211288813
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024110618-voicing-yield-48ff@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From be0e822bb3f5259c7f9424ba97e8175211288813 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch(a)lst.de>
Date: Mon, 28 Oct 2024 10:07:48 +0100
Subject: [PATCH] block: fix queue limits checks in blk_rq_map_user_bvec for
real
blk_rq_map_user_bvec currently only has ad-hoc checks for queue limits,
and the last fix to it enabled valid NVMe I/O to pass, but also allowed
invalid one for drivers that set a max_segment_size or seg_boundary
limit.
Fix it once for all by using the bio_split_rw_at helper from the I/O
path that indicates if and where a bio would be have to be split to
adhere to the queue limits, and it returns a positive value, turn that
into -EREMOTEIO to retry using the copy path.
Fixes: 2ff949441802 ("block: fix sanity checks in blk_rq_map_user_bvec")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: John Garry <john.g.garry(a)oracle.com>
Link: https://lore.kernel.org/r/20241028090840.446180-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-map.c b/block/blk-map.c
index 6ef2ec1f7d78..b5fd1d857461 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -561,55 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
- struct request_queue *q = rq->q;
- size_t nr_iter = iov_iter_count(iter);
- size_t nr_segs = iter->nr_segs;
- struct bio_vec *bvecs, *bvprvp = NULL;
- const struct queue_limits *lim = &q->limits;
- unsigned int nsegs = 0, bytes = 0;
+ const struct queue_limits *lim = &rq->q->limits;
+ unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
+ unsigned int nsegs;
struct bio *bio;
- size_t i;
+ int ret;
- if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
- return -EINVAL;
- if (nr_segs > queue_max_segments(q))
+ if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
return -EINVAL;
- /* no iovecs to alloc, as we already have a BVEC iterator */
+ /* reuse the bvecs from the iterator instead of allocating new ones */
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
- if (bio == NULL)
+ if (!bio)
return -ENOMEM;
-
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
- blk_rq_bio_prep(rq, bio, nr_segs);
- /* loop to perform a bunch of sanity checks */
- bvecs = (struct bio_vec *)iter->bvec;
- for (i = 0; i < nr_segs; i++) {
- struct bio_vec *bv = &bvecs[i];
-
- /*
- * If the queue doesn't support SG gaps and adding this
- * offset would create a gap, fallback to copy.
- */
- if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
- blk_mq_map_bio_put(bio);
- return -EREMOTEIO;
- }
- /* check full condition */
- if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
- goto put_bio;
- if (bytes + bv->bv_len > nr_iter)
- break;
-
- nsegs++;
- bytes += bv->bv_len;
- bvprvp = bv;
+ /* check that the data layout matches the hardware restrictions */
+ ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
+ if (ret) {
+ /* if we would have to split the bio, copy instead */
+ if (ret > 0)
+ ret = -EREMOTEIO;
+ blk_mq_map_bio_put(bio);
+ return ret;
}
+
+ blk_rq_bio_prep(rq, bio, nsegs);
return 0;
-put_bio:
- blk_mq_map_bio_put(bio);
- return -EINVAL;
}
/**
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 5b590160d2cf776b304eb054afafea2bd55e3620
Gitweb: https://git.kernel.org/tip/5b590160d2cf776b304eb054afafea2bd55e3620
Author: Adrian Hunter <adrian.hunter(a)intel.com>
AuthorDate: Tue, 22 Oct 2024 18:59:07 +03:00
Committer: Peter Zijlstra <peterz(a)infradead.org>
CommitterDate: Tue, 05 Nov 2024 12:55:43 +01:00
perf/x86/intel/pt: Fix buffer full but size is 0 case
If the trace data buffer becomes full, a truncated flag [T] is reported
in PERF_RECORD_AUX. In some cases, the size reported is 0, even though
data must have been added to make the buffer full.
That happens when the buffer fills up from empty to full before the
Intel PT driver has updated the buffer position. Then the driver
calculates the new buffer position before calculating the data size.
If the old and new positions are the same, the data size is reported
as 0, even though it is really the whole buffer size.
Fix by detecting when the buffer position is wrapped, and adjust the
data size calculation accordingly.
Example
Use a very small buffer size (8K) and observe the size of truncated [T]
data. Before the fix, it is possible to see records of 0 size.
Before:
$ perf record -m,8K -e intel_pt// uname
Linux
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 0.105 MB perf.data ]
$ perf script -D --no-itrace | grep AUX | grep -F '[T]'
Warning:
AUX data lost 2 times out of 3!
5 19462712368111 0x19710 [0x40]: PERF_RECORD_AUX offset: 0 size: 0 flags: 0x1 [T]
5 19462712700046 0x19ba8 [0x40]: PERF_RECORD_AUX offset: 0x170 size: 0xe90 flags: 0x1 [T]
After:
$ perf record -m,8K -e intel_pt// uname
Linux
[ perf record: Woken up 3 times to write data ]
[ perf record: Captured and wrote 0.040 MB perf.data ]
$ perf script -D --no-itrace | grep AUX | grep -F '[T]'
Warning:
AUX data lost 2 times out of 3!
1 113720802995 0x4948 [0x40]: PERF_RECORD_AUX offset: 0 size: 0x2000 flags: 0x1 [T]
1 113720979812 0x6b10 [0x40]: PERF_RECORD_AUX offset: 0x2000 size: 0x2000 flags: 0x1 [T]
Fixes: 52ca9ced3f70 ("perf/x86/intel/pt: Add Intel PT PMU driver")
Signed-off-by: Adrian Hunter <adrian.hunter(a)intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20241022155920.17511-2-adrian.hunter@intel.com
---
arch/x86/events/intel/pt.c | 11 ++++++++---
arch/x86/events/intel/pt.h | 2 ++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fd4670a..a087bc0 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -828,11 +828,13 @@ static void pt_buffer_advance(struct pt_buffer *buf)
buf->cur_idx++;
if (buf->cur_idx == buf->cur->last) {
- if (buf->cur == buf->last)
+ if (buf->cur == buf->last) {
buf->cur = buf->first;
- else
+ buf->wrapped = true;
+ } else {
buf->cur = list_entry(buf->cur->list.next, struct topa,
list);
+ }
buf->cur_idx = 0;
}
}
@@ -846,8 +848,11 @@ static void pt_buffer_advance(struct pt_buffer *buf)
static void pt_update_head(struct pt *pt)
{
struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ bool wrapped = buf->wrapped;
u64 topa_idx, base, old;
+ buf->wrapped = false;
+
if (buf->single) {
local_set(&buf->data_size, buf->output_off);
return;
@@ -865,7 +870,7 @@ static void pt_update_head(struct pt *pt)
} else {
old = (local64_xchg(&buf->head, base) &
((buf->nr_pages << PAGE_SHIFT) - 1));
- if (base < old)
+ if (base < old || (base == old && wrapped))
base += buf->nr_pages << PAGE_SHIFT;
local_add(base - old, &buf->data_size);
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index f5e46c0..a1b6c04 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ struct pt_pmu {
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
* @single: use Single Range Output instead of ToPA
+ * @wrapped: buffer advance wrapped back to the first topa table
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@@ -82,6 +83,7 @@ struct pt_buffer {
local64_t head;
bool snapshot;
bool single;
+ bool wrapped;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
From: Michal Pecio <michal.pecio(a)gmail.com>
Stop Endpoint command on an already stopped endpoint fails and may be
misinterpreted as a known hardware bug by the completion handler. This
results in an unnecessary delay with repeated retries of the command.
Avoid queuing this command when endpoint state flags indicate that it's
stopped or halted and the command will fail. If commands are pending on
the endpoint, their completion handlers will process cancelled TDs so
it's done. In case of waiting for external operations like clearing TT
buffer, the endpoint is stopped and cancelled TDs can be processed now.
This eliminates practically all unnecessary retries because an endpoint
with pending URBs is maintained in Running state by the driver, unless
aforementioned commands or other operations are pending on it. This is
guaranteed by xhci_ring_ep_doorbell() and by the fact that it is called
every time any of those operations completes.
The only known exceptions are hardware bugs (the endpoint never starts
at all) and Stream Protocol errors not associated with any TRB, which
cause an endpoint reset not followed by restart. Sounds like a bug.
Generally, these retries are only expected to happen when the endpoint
fails to start for unknown/no reason, which is a worse problem itself,
and fixing the bug eliminates the retries too.
All cases were tested and found to work as expected. SET_DEQ_PENDING
was produced by patching uvcvideo to unlink URBs in 100us intervals,
which then runs into this case very often. EP_HALTED was produced by
restarting 'cat /dev/ttyUSB0' on a serial dongle with broken cable.
EP_CLEARING_TT by the same, with the dongle on an external hub.
Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers")
CC: stable(a)vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 13 +++++++++++++
drivers/usb/host/xhci.c | 19 +++++++++++++++----
drivers/usb/host/xhci.h | 1 +
3 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 55be03be2374..4cf5363875c7 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1076,6 +1076,19 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep)
return 0;
}
+/*
+ * Erase queued TDs from transfer ring(s) and give back those the xHC didn't
+ * stop on. If necessary, queue commands to move the xHC off cancelled TDs it
+ * stopped on. Those will be given back later when the commands complete.
+ *
+ * Call under xhci->lock on a stopped endpoint.
+ */
+void xhci_process_cancelled_tds(struct xhci_virt_ep *ep)
+{
+ xhci_invalidate_cancelled_tds(ep);
+ xhci_giveback_invalidated_tds(ep);
+}
+
/*
* Returns the TD the endpoint ring halted on.
* Only call for non-running rings without streams.
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 4977ada0a19e..5ebde8cae4fc 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1756,10 +1756,21 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
}
}
- /* Queue a stop endpoint command, but only if this is
- * the first cancellation to be handled.
- */
- if (!(ep->ep_state & EP_STOP_CMD_PENDING)) {
+ /* These completion handlers will sort out cancelled TDs for us */
+ if (ep->ep_state & (EP_STOP_CMD_PENDING | EP_HALTED | SET_DEQ_PENDING)) {
+ xhci_dbg(xhci, "Not queuing Stop Endpoint on slot %d ep %d in state 0x%x\n",
+ urb->dev->slot_id, ep_index, ep->ep_state);
+ goto done;
+ }
+
+ /* In this case no commands are pending but the endpoint is stopped */
+ if (ep->ep_state & EP_CLEARING_TT) {
+ /* and cancelled TDs can be given back right away */
+ xhci_dbg(xhci, "Invalidating TDs instantly on slot %d ep %d in state 0x%x\n",
+ urb->dev->slot_id, ep_index, ep->ep_state);
+ xhci_process_cancelled_tds(ep);
+ } else {
+ /* Otherwise, queue a new Stop Endpoint command */
command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
if (!command) {
ret = -ENOMEM;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6dd3138b2380..4914f0a10cff 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1922,6 +1922,7 @@ void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring);
unsigned int count_trbs(u64 addr, u64 len);
int xhci_stop_endpoint_sync(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
int suspend, gfp_t gfp_flags);
+void xhci_process_cancelled_tds(struct xhci_virt_ep *ep);
/* xHCI roothub code */
void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port,
--
2.25.1
From: Michal Pecio <michal.pecio(a)gmail.com>
xhci_invalidate_cancelled_tds() may not work correctly if the hardware
is modifying endpoint or stream contexts at the same time by executing
a Set TR Dequeue command. And even if it worked, it would be unable to
queue Set TR Dequeue for the next stream, failing to clear xHC cache.
On stream endpoints, a chain of Set TR Dequeue commands may take some
time to execute and we may want to cancel more TDs during this time.
Currently this leads to Stop Endpoint completion handler calling this
function without testing for SET_DEQ_PENDING, which will trigger the
aforementioned problems when it happens.
On all endpoints, a halt condition causes Reset Endpoint to be queued
and an error status given to the class driver, which may unlink more
URBs in response. Stop Endpoint is queued and its handler may execute
concurrently with Set TR Dequeue queued by Reset Endpoint handler.
(Reset Endpoint handler calls this function too, but there seems to
be no possibility of it running concurrently with Set TR Dequeue).
Fix xhci_invalidate_cancelled_tds() to work correctly under a pending
Set TR Dequeue. Bail out of the function when SET_DEQ_PENDING is set,
then make the completion handler call the function again and also call
xhci_giveback_invalidated_tds(), which needs to be called next.
This seems to fix another potential bug, where the handler would call
xhci_invalidate_cancelled_tds(), which may clear some deferred TDs if
a sanity check fails, and the TDs wouldn't be given back promptly.
Said sanity check seems to be wrong and prone to false positives when
the endpoint halts, but fixing it is beyond the scope of this change,
besides ensuring that cleared TDs are given back properly.
Fixes: 5ceac4402f5d ("xhci: Handle TD clearing for multiple streams case")
CC: stable(a)vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index dd23596ccd84..55be03be2374 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -980,6 +980,13 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep)
unsigned int slot_id = ep->vdev->slot_id;
int err;
+ /*
+ * This is not going to work if the hardware is changing its dequeue
+ * pointers as we look at them. Completion handler will call us later.
+ */
+ if (ep->ep_state & SET_DEQ_PENDING)
+ return 0;
+
xhci = ep->xhci;
list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) {
@@ -1367,7 +1374,6 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
struct xhci_slot_ctx *slot_ctx;
struct xhci_stream_ctx *stream_ctx;
struct xhci_td *td, *tmp_td;
- bool deferred = false;
ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2]));
@@ -1471,8 +1477,6 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
xhci_dbg(ep->xhci, "%s: Giveback cancelled URB %p TD\n",
__func__, td->urb);
xhci_td_cleanup(ep->xhci, td, ep_ring, td->status);
- } else if (td->cancel_status == TD_CLEARING_CACHE_DEFERRED) {
- deferred = true;
} else {
xhci_dbg(ep->xhci, "%s: Keep cancelled URB %p TD as cancel_status is %d\n",
__func__, td->urb, td->cancel_status);
@@ -1483,11 +1487,15 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
ep->queued_deq_seg = NULL;
ep->queued_deq_ptr = NULL;
- if (deferred) {
- /* We have more streams to clear */
+ /* Check for deferred or newly cancelled TDs */
+ if (!list_empty(&ep->cancelled_td_list)) {
xhci_dbg(ep->xhci, "%s: Pending TDs to clear, continuing with invalidation\n",
__func__);
xhci_invalidate_cancelled_tds(ep);
+ /* Try to restart the endpoint if all is done */
+ ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
+ /* Start giving back any TDs invalidated above */
+ xhci_giveback_invalidated_tds(ep);
} else {
/* Restart any rings with pending URBs */
xhci_dbg(ep->xhci, "%s: All TDs cleared, ring doorbell\n", __func__);
--
2.25.1
From: Michal Pecio <michal.pecio(a)gmail.com>
Some host controllers fail to atomically transition an endpoint to the
Running state on a doorbell ring and enter a hidden "Restarting" state,
which looks very much like Stopped, with the important difference that
it will spontaneously transition to Running anytime soon.
A Stop Endpoint command queued in the Restarting state typically fails
with Context State Error and the completion handler sees the Endpoint
Context State as either still Stopped or already Running. Even a case
of Halted was observed, when an error occurred right after the restart.
The Halted state is already recovered from by resetting the endpoint.
The Running state is handled by retrying Stop Endpoint.
The Stopped state was recognized as a problem on NEC controllers and
worked around also by retrying, because the endpoint soon restarts and
then stops for good. But there is a risk: the command may fail if the
endpoint is "stopped for good" already, and retries will fail forever.
The possibility of this was not realized at the time, but a number of
cases were discovered later and reproduced. Some proved difficult to
deal with, and it is outright impossible to predict if an endpoint may
fail to ever start at all due to a hardware bug. One such bug (albeit
on ASM3142, not on NEC) was found to be reliably triggered simply by
toggling an AX88179 NIC up/down in a tight loop for a few seconds.
An endless retries storm is quite nasty. Besides putting needless load
on the xHC and CPU, it causes URBs never to be given back, paralyzing
the device and connection/disconnection logic for the whole bus if the
device is unplugged. User processes waiting for URBs become unkillable,
drivers and kworker threads lock up and xhci_hcd cannot be reloaded.
For peace of mind, impose a timeout on Stop Endpoint retries in this
case. If they don't succeed in 100ms, consider the endpoint stopped
permanently for some reason and just give back the unlinked URBs. This
failure case is rare already and work is under way to make it rarer.
Start this work today by also handling one simple case of race with
Reset Endpoint, because it costs just two lines to implement.
Fixes: fd9d55d190c0 ("xhci: retry Stop Endpoint on buggy NEC controllers")
CC: stable(a)vger.kernel.org
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 28 ++++++++++++++++++++++++----
drivers/usb/host/xhci.c | 2 ++
drivers/usb/host/xhci.h | 1 +
3 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index c9c0c4a7588a..dd23596ccd84 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -52,6 +52,7 @@
* endpoint rings; it generates events on the event ring for these.
*/
+#include <linux/jiffies.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
@@ -1158,16 +1159,35 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
return;
case EP_STATE_STOPPED:
/*
- * NEC uPD720200 sometimes sets this state and fails with
- * Context Error while continuing to process TRBs.
- * Be conservative and trust EP_CTX_STATE on other chips.
+ * Per xHCI 4.6.9, Stop Endpoint command on a Stopped
+ * EP is a Context State Error, and EP stays Stopped.
+ *
+ * But maybe it failed on Halted, and somebody ran Reset
+ * Endpoint later. EP state is now Stopped and EP_HALTED
+ * still set because Reset EP handler will run after us.
+ */
+ if (ep->ep_state & EP_HALTED)
+ break;
+ /*
+ * On some HCs EP state remains Stopped for some tens of
+ * us to a few ms or more after a doorbell ring, and any
+ * new Stop Endpoint fails without aborting the restart.
+ * This handler may run quickly enough to still see this
+ * Stopped state, but it will soon change to Running.
+ *
+ * Assume this bug on unexpected Stop Endpoint failures.
+ * Keep retrying until the EP starts and stops again, on
+ * chips where this is known to help. Wait for 100ms.
*/
if (!(xhci->quirks & XHCI_NEC_HOST))
break;
+ if (time_is_before_jiffies(ep->stop_time + msecs_to_jiffies(100)))
+ break;
fallthrough;
case EP_STATE_RUNNING:
/* Race, HW handled stop ep cmd before ep was running */
- xhci_dbg(xhci, "Stop ep completion ctx error, ep is running\n");
+ xhci_dbg(xhci, "Stop ep completion ctx error, ctx_state %d\n",
+ GET_EP_CTX_STATE(ep_ctx));
command = xhci_alloc_command(xhci, false, GFP_ATOMIC);
if (!command) {
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index bc477cf99805..4977ada0a19e 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -8,6 +8,7 @@
* Some code borrowed from the Linux EHCI driver.
*/
+#include <linux/jiffies.h>
#include <linux/pci.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
@@ -1764,6 +1765,7 @@ static int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
ret = -ENOMEM;
goto done;
}
+ ep->stop_time = jiffies;
ep->ep_state |= EP_STOP_CMD_PENDING;
xhci_queue_stop_endpoint(xhci, command, urb->dev->slot_id,
ep_index, 0);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index a0e992c3db0d..6dd3138b2380 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -691,6 +691,7 @@ struct xhci_virt_ep {
/* Bandwidth checking storage */
struct xhci_bw_info bw_info;
struct list_head bw_endpoint_list;
+ unsigned long stop_time;
/* Isoch Frame ID checking storage */
int next_frame_id;
/* Use new Isoch TRB layout needed for extended TBC support */
--
2.25.1
From: Kuangyi Chiang <ki.chiang65(a)gmail.com>
Performing a stability stress test on a USB3.0 2.5G ethernet adapter
results in errors like this:
[ 91.441469] r8152 2-3:1.0 eth3: get_registers -71
[ 91.458659] r8152 2-3:1.0 eth3: get_registers -71
[ 91.475911] r8152 2-3:1.0 eth3: get_registers -71
[ 91.493203] r8152 2-3:1.0 eth3: get_registers -71
[ 91.510421] r8152 2-3:1.0 eth3: get_registers -71
The r8152 driver will periodically issue lots of control-IN requests
to access the status of ethernet adapter hardware registers during
the test.
This happens when the xHCI driver enqueue a control TD (which cross
over the Link TRB between two ring segments, as shown) in the endpoint
zero's transfer ring. Seems the Etron xHCI host can not perform this
TD correctly, causing the USB transfer error occurred, maybe the upper
driver retry that control-IN request can solve problem, but not all
drivers do this.
| |
-------
| TRB | Setup Stage
-------
| TRB | Link
-------
-------
| TRB | Data Stage
-------
| TRB | Status Stage
-------
| |
To work around this, the xHCI driver should enqueue a No Op TRB if
next available TRB is the Link TRB in the ring segment, this can
prevent the Setup and Data Stage TRB to be breaked by the Link TRB.
Check if the XHCI_ETRON_HOST quirk flag is set before invoking the
workaround in xhci_queue_ctrl_tx().
Fixes: d0e96f5a71a0 ("USB: xhci: Control transfer support.")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Kuangyi Chiang <ki.chiang65(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-ring.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f62b243d0fc4..517df97ef496 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3733,6 +3733,20 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
if (!urb->setup_packet)
return -EINVAL;
+ if ((xhci->quirks & XHCI_ETRON_HOST) &&
+ urb->dev->speed >= USB_SPEED_SUPER) {
+ /*
+ * If next available TRB is the Link TRB in the ring segment then
+ * enqueue a No Op TRB, this can prevent the Setup and Data Stage
+ * TRB to be breaked by the Link TRB.
+ */
+ if (trb_is_link(ep_ring->enqueue + 1)) {
+ field = TRB_TYPE(TRB_TR_NOOP) | ep_ring->cycle_state;
+ queue_trb(xhci, ep_ring, false, 0, 0,
+ TRB_INTR_TARGET(0), field);
+ }
+ }
+
/* 1 TRB for setup, 1 for status */
num_trbs = 2;
/*
--
2.25.1
From: Kuangyi Chiang <ki.chiang65(a)gmail.com>
Sometimes the hub driver does not recognize the USB device connected
to the external USB2.0 hub when the system resumes from S4.
After the SetPortFeature(PORT_RESET) request is completed, the hub
driver calls the HCD reset_device callback, which will issue a Reset
Device command and free all structures associated with endpoints
that were disabled.
This happens when the xHCI driver issue a Reset Device command to
inform the Etron xHCI host that the USB device associated with a
device slot has been reset. Seems that the Etron xHCI host can not
perform this command correctly, affecting the USB device.
To work around this, the xHCI driver should obtain a new device slot
with reference to commit 651aaf36a7d7 ("usb: xhci: Handle USB transaction
error on address command"), which is another way to inform the Etron
xHCI host that the USB device has been reset.
Add a new XHCI_ETRON_HOST quirk flag to invoke the workaround in
xhci_discover_or_reset_device().
Fixes: 2a8f82c4ceaf ("USB: xhci: Notify the xHC when a device is reset.")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Kuangyi Chiang <ki.chiang65(a)gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-pci.c | 1 +
drivers/usb/host/xhci.c | 19 +++++++++++++++++++
drivers/usb/host/xhci.h | 1 +
3 files changed, 21 insertions(+)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index db3c7e738213..4b8c93e59d6d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -396,6 +396,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
(pdev->device == PCI_DEVICE_ID_EJ168 ||
pdev->device == PCI_DEVICE_ID_EJ188)) {
+ xhci->quirks |= XHCI_ETRON_HOST;
xhci->quirks |= XHCI_RESET_ON_RESUME;
xhci->quirks |= XHCI_BROKEN_STREAMS;
}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index aa8c877f47ac..ae16253b53fb 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -3733,6 +3733,8 @@ void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci,
xhci->num_active_eps);
}
+static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev);
+
/*
* This submits a Reset Device Command, which will set the device state to 0,
* set the device address to 0, and disable all the endpoints except the default
@@ -3803,6 +3805,23 @@ static int xhci_discover_or_reset_device(struct usb_hcd *hcd,
SLOT_STATE_DISABLED)
return 0;
+ if (xhci->quirks & XHCI_ETRON_HOST) {
+ /*
+ * Obtaining a new device slot to inform the xHCI host that
+ * the USB device has been reset.
+ */
+ ret = xhci_disable_slot(xhci, udev->slot_id);
+ xhci_free_virt_device(xhci, udev->slot_id);
+ if (!ret) {
+ ret = xhci_alloc_dev(hcd, udev);
+ if (ret == 1)
+ ret = 0;
+ else
+ ret = -EINVAL;
+ }
+ return ret;
+ }
+
trace_xhci_discover_or_reset_device(slot_ctx);
xhci_dbg(xhci, "Resetting device with slot ID %u\n", slot_id);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d3b250c736b8..a0204e10486d 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1631,6 +1631,7 @@ struct xhci_hcd {
#define XHCI_ZHAOXIN_HOST BIT_ULL(46)
#define XHCI_WRITE_64_HI_LO BIT_ULL(47)
#define XHCI_CDNS_SCTX_QUIRK BIT_ULL(48)
+#define XHCI_ETRON_HOST BIT_ULL(49)
unsigned int num_active_eps;
unsigned int limit_active_eps;
--
2.25.1