From: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).
Currently both paths can call pq_update() if an error occurrs. This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.
Several variables are used to determine SDMA send state. Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.
The request 'status' value can be set by the setup or by the
completion function. This is code inspectibly racy. Since the status
is not needed in the completion code or by the caller it has been
removed.
The request 'done' value races between usage by the setup and the
completion function. The completion function does not need this.
When the number of processed packets matches npkts, it is done.
The 'has_error' value races between usage of the setup and the
completion function. This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).
Simplify the code by removing all of the unneeded state checks and
variables.
Clean up iovs node when it is freed.
Eliminate race conditions in the error path:
If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).
If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.
These two change eliminate the race condition in the error path.
Cc: <stable(a)vger.kernel.org> # 4.9.0
Reviewed-by: Mitko Haralanov <mitko.haralanov(a)intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
---
drivers/infiniband/hw/hfi1/user_sdma.c | 106 ++++++++++++++------------------
1 file changed, 45 insertions(+), 61 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 018a415..619475c 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -148,11 +148,8 @@
#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
/* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAVE_AHG 3
-#define SDMA_REQ_HAS_ERROR 4
-#define SDMA_REQ_DONE_ERROR 5
+#define SDMA_REQ_HAVE_AHG 1
+#define SDMA_REQ_HAS_ERROR 2
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
@@ -252,8 +249,6 @@ struct user_sdma_request {
u64 seqsubmitted;
struct list_head txps;
unsigned long flags;
- /* status of the last txreq completed */
- int status;
};
/*
@@ -546,7 +541,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
struct sdma_req_info info;
struct user_sdma_request *req;
u8 opcode, sc, vl;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -611,11 +605,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
req->pq = pq;
req->cq = cq;
- req->status = -1;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -704,7 +700,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
- req->status = ret;
+ req->data_iovs = i;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -772,14 +768,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
/*
* It is possible that the SDMA engine would have processed all the
@@ -796,17 +788,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
* request have been submitted to the SDMA engine. However, it
* will not wait for send completions.
*/
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
- if (ACCESS_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -817,10 +803,19 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -903,10 +898,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
pq = req->pq;
/* If tx completion has reported an error, we are done. */
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
return -EFAULT;
- }
/*
* Check if we might have sent the entire request already
@@ -929,10 +922,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* with errors. If so, we are not going to process any
* more packets from this request.
*/
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags))
return -EFAULT;
- }
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
if (!tx)
@@ -1090,7 +1081,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1489,11 +1479,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return diff;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1502,7 +1496,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1515,31 +1509,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
- (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1572,6 +1554,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
From: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
commit a0e0cb82804a6a21d9067022c2dfdf80d11da429 upstream
pq_update() can only be called in two places: from the completion
function when the complete (npkts) sequence of packets has been
submitted and processed, or from setup function if a subset of the
packets were submitted (i.e. the error path).
Currently both paths can call pq_update() if an error occurrs. This
race will cause the n_req value to go negative, hanging file_close(),
or cause a crash by freeing the txlist more than once.
Several variables are used to determine SDMA send state. Most of
these are unnecessary, and have code inspectible races between the
setup function and the completion function, in both the send path and
the error path.
The request 'status' value can be set by the setup or by the
completion function. This is code inspectibly racy. Since the status
is not needed in the completion code or by the caller it has been
removed.
The request 'done' value races between usage by the setup and the
completion function. The completion function does not need this.
When the number of processed packets matches npkts, it is done.
The 'has_error' value races between usage of the setup and the
completion function. This can cause incorrect error handling and leave
the n_req in an incorrect value (i.e. negative).
Simplify the code by removing all of the unneeded state checks and
variables.
Clean up iovs node when it is freed.
Eliminate race conditions in the error path:
If all packets are submitted, the completion handler will set the
completion status correctly (ok or aborted).
If all packets are not submitted, the caller must wait until the
submitted packets have completed, and then set the completion status.
These two change eliminate the race condition in the error path.
Cc: <stable(a)vger.kernel.org> # 4.18.x+
Reviewed-by: Mitko Haralanov <mitko.haralanov(a)intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn(a)intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl(a)intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro(a)intel.com>
Signed-off-by: Jason Gunthorpe <jgg(a)mellanox.com>
---
drivers/infiniband/hw/hfi1/user_sdma.c | 87 ++++++++++++++------------------
drivers/infiniband/hw/hfi1/user_sdma.h | 3 -
2 files changed, 39 insertions(+), 51 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706..39134dd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -392,7 +391,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@@ -400,12 +398,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
- req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
- req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
/*
* It is possible that the SDMA engine would have processed all the
@@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- WRITE_ONCE(req->has_error, 1);
- if (READ_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -917,7 +915,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1362,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1378,7 +1379,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1391,31 +1392,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
- (READ_ONCE(req->done) ||
- READ_ONCE(req->has_error))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f..0ae0645 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -205,8 +205,6 @@ struct user_sdma_request {
/* Writeable fields shared with interrupt */
u64 seqcomp ____cacheline_aligned_in_smp;
u64 seqsubmitted;
- /* status of the last txreq completed */
- int status;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
@@ -228,7 +226,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
- u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
Snip
> > > >
> > > > On Tue, Nov 20, 2018 at 10:03:59AM +0100, Jean Delvare wrote:
> > > > > On Tue, 20 Nov 2018 09:54:19 +0100, Greg KH wrote:
> > > > > > Ok, I'll go revert this, but shouldn't it also be reverted in
> > > > > > Linus's tree as well?
> > > > >
Hi Jean and Greg,
> > > > > No. As I understand it (with my limited knowledge of ACPICA),
> > > > > the change itself is correct. The problem is that it will detect
> > > > > resource conflicts which were unnoticed before, and that will
> > > > > prevent drivers from loading. Some of them may be addressed with
> > > > > driver fixes or new drivers. Others are false positives (due to
> > > > > bogus BIOS) which users will have to work around with
> > > > > acpi_resource_conflicts=lax. We have been through this before,
> > > > > nothing new really, but it takes years to address such problems.
> > > > > This just
> > can't be done in stable kernel series.
> > >
> > > I would like to give you more context.
> > >
> > > There was a fairly complicated change that occurred in 4.17 and we
> > > caused a regression by forgetting to add region addresses in a
> > > global list during operation region initialization. We found the
> > > regression when bug reporters tried to boot their macbook pro and
> > > asus laptop and saw that there was a difference in behavior when
> > > drivers are being loaded
> >
> > Commit 4abb951b73ff0a8a979113ef185651aa3c8da19b has no Fixes tag.
> > Which commit introduced the regression? Can you point me to the
> > associated bug reports?
>
> Sorry about the missing fixed tag. It's supposed to fix
> 5a8361f7ecceaed64b4064000d16cb703462be49
> ACPICA: Integrate package handling with module-level code
>
> >
> > > So what I am trying to say is that we have been emitting these
> > > errors for a while before we caused the regression. The goal with
> > > this patch is to keep the behavior the same as kernels older than
> > > 4.17 where warnings are printed to dmesg due to resource conflicts.
> >
> > Fine with me for upstream, but I still need to be convinced that it
> > belongs to stable series. For now, the only 2 related bugs I know of
> > are
> > #200011 (which is NOT fixed by commit
>
> This bug is kind of messy. It's like 2 bugs in 1 report. This patch fixes a part of
> their problem. Another part has to do with ECDT load order.
>
> Would it help to open a separate bug report for this particular issue and close
> it?
> I think this might be a little confusing to someone who reads the commit
> message.
>
> > 4abb951b73ff0a8a979113ef185651aa3c8da19b) and #201721 (which is
> caused
>
> I did not know about this bug report.
>
> > by that commit). 1 vs 0, revert wins. If you want me to change my
> > mind, you must provide additional data points proving that commit
> > 4abb951b73ff0a8a979113ef185651aa3c8da19b solves more functional
> > regressions than it causes.
Here's an additional data point from the discussion on bz201721.
The user started using fancontrol during the period where resource conflict
checking was unintentionally removed. If the reporter tries using fancontrol
on 4.18.13, they report that fancontrol does not work. So it was basically
always the case that he needed to use acpi_resource_conflicts=lax for a short
period of time.
I understand that this "breaks" this machine but we shouldn't be reverting
resource conflict checking for all other drivers just because of fancontrol. By
removing this check, we are suppressing warnings and changing the loading behavior
of other drivers as well.
Thanks,
Erik
>
> If we do not have include this commit, then we are in a state where resource
> conflicts are not properly detected. These detections help kernels decide
> which drivers to load. However, I do see your point about this breaking the
> reporter's machine and it's possible that I may have missed something.
>
> We should get the reporter's dmesg for older kernels.
>
> It would be ideal to resolve this bug AND add region addresses in the global
> address list.
> I've asked the bug reporter in #201721 for more information. We can delay
> this patch for now and look at the reporter's issue more carefully.
>
> Thanks for all of the information!
>
> Erik
> >
> > Thanks,
> > --
> > Jean Delvare
> > SUSE L3 Support
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From cbffaf7aa09edbaea2bc7dc440c945297095e2fd Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein(a)systec-electronic.com>
Date: Thu, 11 Oct 2018 17:01:25 +0200
Subject: [PATCH] can: flexcan: Always use last mailbox for TX
Essentially this patch moves the TX mailbox to position 63, regardless
of timestamp based offloading or RX FIFO. So mainly the iflag register
usage regarding TX has changed. The rest is consolidating RX FIFO and
timestamp offloading as they now use both the same TX mailbox.
The reason is a very annoying behavior regarding sending RTR frames when
_not_ using RX FIFO:
If a TX mailbox sent a RTR frame it becomes a RX mailbox. For that
reason flexcan_irq disables the TX mailbox again. But if during the time
the RTR was sent and the TX mailbox is disabled a new CAN frames is
received, it is lost without notice. The reason is that so-called
"Move-in" process starts from the lowest mailbox which happen to be a TX
mailbox set to EMPTY.
Steps to reproduce (I used an imx7d):
1. generate regular bursts of messages
2. send a RTR from flexcan with higher priority than burst messages every
1ms, e.g. cangen -I 0x100 -L 0 -g 1 -R can0
3. notice a lost message without notification after some seconds
When running an iperf in parallel this problem is occurring even more
frequently. Using filters is not possible as at least one single CAN-ID
is allowed. Handling the TX MB during RX is also not possible as there
is no race-free disable of RX MB.
There is still a slight window when the described problem can occur. But
for that all RX MB must be in use which is essentially next to an
overrun. Still there will be no indication if it ever occurs.
Signed-off-by: Alexander Stein <alexander.stein(a)systec-electronic.com>
Cc: linux-stable <stable(a)vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 0431f8d05518..677c41701cf3 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -135,13 +135,12 @@
/* FLEXCAN interrupt flag register (IFLAG) bits */
/* Errata ERR005829 step7: Reserve first valid MB */
-#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
-#define FLEXCAN_TX_MB_OFF_FIFO 9
+#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8
#define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0
-#define FLEXCAN_TX_MB_OFF_TIMESTAMP 1
-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_OFF_TIMESTAMP + 1)
-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST 63
-#define FLEXCAN_IFLAG_MB(x) BIT(x)
+#define FLEXCAN_TX_MB 63
+#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
+#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST (FLEXCAN_TX_MB - 1)
+#define FLEXCAN_IFLAG_MB(x) BIT(x & 0x1f)
#define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7)
#define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6)
#define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5)
@@ -737,9 +736,9 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
struct flexcan_regs __iomem *regs = priv->regs;
u32 iflag1, iflag2;
- iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default;
- iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default &
+ iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default &
~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
+ iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default;
return (u64)iflag2 << 32 | iflag1;
}
@@ -751,11 +750,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
struct flexcan_priv *priv = netdev_priv(dev);
struct flexcan_regs __iomem *regs = priv->regs;
irqreturn_t handled = IRQ_NONE;
- u32 reg_iflag1, reg_esr;
+ u32 reg_iflag2, reg_esr;
enum can_state last_state = priv->can.state;
- reg_iflag1 = priv->read(®s->iflag1);
-
/* reception interrupt */
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
u64 reg_iflag;
@@ -769,6 +766,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
break;
}
} else {
+ u32 reg_iflag1;
+
+ reg_iflag1 = priv->read(®s->iflag1);
if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) {
handled = IRQ_HANDLED;
can_rx_offload_irq_offload_fifo(&priv->offload);
@@ -784,8 +784,10 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
}
}
+ reg_iflag2 = priv->read(®s->iflag2);
+
/* transmission complete interrupt */
- if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
+ if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
handled = IRQ_HANDLED;
stats->tx_bytes += can_get_echo_skb(dev, 0);
stats->tx_packets++;
@@ -794,7 +796,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
/* after sending a RTR frame MB is in RX mode */
priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
&priv->tx_mb->can_ctrl);
- priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag1);
+ priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag2);
netif_wake_queue(dev);
}
@@ -936,15 +938,13 @@ static int flexcan_chip_start(struct net_device *dev)
reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ |
- FLEXCAN_MCR_IDAM_C;
+ FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
reg_mcr &= ~FLEXCAN_MCR_FEN;
- reg_mcr |= FLEXCAN_MCR_MAXMB(priv->offload.mb_last);
- } else {
- reg_mcr |= FLEXCAN_MCR_FEN |
- FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
- }
+ else
+ reg_mcr |= FLEXCAN_MCR_FEN;
+
netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr);
priv->write(reg_mcr, ®s->mcr);
@@ -987,16 +987,17 @@ static int flexcan_chip_start(struct net_device *dev)
priv->write(reg_ctrl2, ®s->ctrl2);
}
- /* clear and invalidate all mailboxes first */
- for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) {
- priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
- ®s->mb[i].can_ctrl);
- }
-
if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
- for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++)
+ for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) {
priv->write(FLEXCAN_MB_CODE_RX_EMPTY,
®s->mb[i].can_ctrl);
+ }
+ } else {
+ /* clear and invalidate unused mailboxes first */
+ for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i <= ARRAY_SIZE(regs->mb); i++) {
+ priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
+ ®s->mb[i].can_ctrl);
+ }
}
/* Errata ERR005829: mark first TX mailbox as INACTIVE */
@@ -1360,17 +1361,15 @@ static int flexcan_probe(struct platform_device *pdev)
priv->devtype_data = devtype_data;
priv->reg_xceiver = reg_xceiver;
- if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
- priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP;
+ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP];
- } else {
- priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO;
+ else
priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO];
- }
+ priv->tx_mb_idx = FLEXCAN_TX_MB;
priv->tx_mb = ®s->mb[priv->tx_mb_idx];
- priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
- priv->reg_imask2_default = 0;
+ priv->reg_imask1_default = 0;
+ priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
priv->offload.mailbox_read = flexcan_mailbox_read;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e01ad46d53b59720c6ae69963ee1756506954c85 Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang(a)oracle.com>
Date: Fri, 12 Oct 2018 18:07:28 +0800
Subject: [PATCH] blk-mq: fallback to previous nr_hw_queues when updating fails
When we try to increate the nr_hw_queues, we may fail due to
shortage of memory or other reason, then blk_mq_realloc_hw_ctxs stops
and some entries in q->queue_hw_ctx are left with NULL. However,
because queue map has been updated with new nr_hw_queues, some cpus
have been mapped to hw queue which just encounters allocation failure,
thus blk_mq_map_queue could return NULL. This will cause panic in
following blk_mq_map_swqueue.
To fix it, when increase nr_hw_queues fails, fallback to previous
nr_hw_queues and post warning. At the same time, driver's .map_queues
usually use completion irq affinity to map hw and cpu, fallback
nr_hw_queues will cause lack of some cpu's map to hw, so use default
blk_mq_map_queues to do that.
Reported-by: syzbot+83e8cbe702263932d9d4(a)syzkaller.appspotmail.com
Signed-off-by: Jianchao Wang <jianchao.w.wang(a)oracle.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 941f51380077..c2ecd64a2403 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2557,7 +2557,7 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
- int i, j;
+ int i, j, end;
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
/* protect against switching io scheduler */
@@ -2591,8 +2591,20 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
break;
}
}
+ /*
+ * Increasing nr_hw_queues fails. Free the newly allocated
+ * hctxs and keep the previous q->nr_hw_queues.
+ */
+ if (i != set->nr_hw_queues) {
+ j = q->nr_hw_queues;
+ end = i;
+ } else {
+ j = i;
+ end = q->nr_hw_queues;
+ q->nr_hw_queues = set->nr_hw_queues;
+ }
- for (j = i; j < q->nr_hw_queues; j++) {
+ for (; j < end; j++) {
struct blk_mq_hw_ctx *hctx = hctxs[j];
if (hctx) {
@@ -2604,7 +2616,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
}
}
- q->nr_hw_queues = i;
mutex_unlock(&q->sysfs_lock);
}
@@ -2989,6 +3000,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
{
struct request_queue *q;
LIST_HEAD(head);
+ int prev_nr_hw_queues;
lockdep_assert_held(&set->tag_list_lock);
@@ -3017,10 +3029,19 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_sysfs_unregister(q);
}
+ prev_nr_hw_queues = set->nr_hw_queues;
set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
+fallback:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
+ if (q->nr_hw_queues != set->nr_hw_queues) {
+ pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
+ nr_hw_queues, prev_nr_hw_queues);
+ set->nr_hw_queues = prev_nr_hw_queues;
+ blk_mq_map_queues(set);
+ goto fallback;
+ }
blk_mq_map_swqueue(q);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2f31a67f01a8beb22cae754c53522cb61a005750 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Date: Thu, 15 Nov 2018 11:38:41 +0200
Subject: [PATCH] usb: xhci: Prevent bus suspend if a port connect change or
polling state is detected
USB3 roothub might autosuspend before a plugged USB3 device is detected,
causing USB3 device enumeration failure.
USB3 devices don't show up as connected and enabled until USB3 link trainig
completes. On a fast booting platform with a slow USB3 link training the
link might reach the connected enabled state just as the bus is suspending.
If this device is discovered first time by the xhci_bus_suspend() routine
it will be put to U3 suspended state like the other ports which failed to
suspend earlier.
The hub thread will notice the connect change and resume the bus,
moving the port back to U0
This U0 -> U3 -> U0 transition right after being connected seems to be
too much for some devices, causing them to first go to SS.Inactive state,
and finally end up stuck in a polling state with reset asserted
Fix this by failing the bus suspend if a port has a connect change or is
in a polling state in xhci_bus_suspend().
Don't do any port changes until all ports are checked, buffer all port
changes and only write them in the end if suspend can proceed
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index da98a11244e2..94aca1b5ac8a 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1474,15 +1474,18 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
unsigned long flags;
struct xhci_hub *rhub;
struct xhci_port **ports;
+ u32 portsc_buf[USB_MAXCHILDREN];
+ bool wake_enabled;
rhub = xhci_get_rhub(hcd);
ports = rhub->ports;
max_ports = rhub->num_ports;
bus_state = &xhci->bus_state[hcd_index(hcd)];
+ wake_enabled = hcd->self.root_hub->do_remote_wakeup;
spin_lock_irqsave(&xhci->lock, flags);
- if (hcd->self.root_hub->do_remote_wakeup) {
+ if (wake_enabled) {
if (bus_state->resuming_ports || /* USB2 */
bus_state->port_remote_wakeup) { /* USB3 */
spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1490,26 +1493,36 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
return -EBUSY;
}
}
-
- port_index = max_ports;
+ /*
+ * Prepare ports for suspend, but don't write anything before all ports
+ * are checked and we know bus suspend can proceed
+ */
bus_state->bus_suspended = 0;
+ port_index = max_ports;
while (port_index--) {
- /* suspend the port if the port is not suspended */
u32 t1, t2;
- int slot_id;
t1 = readl(ports[port_index]->addr);
t2 = xhci_port_state_to_neutral(t1);
+ portsc_buf[port_index] = 0;
- if ((t1 & PORT_PE) && !(t1 & PORT_PLS_MASK)) {
- xhci_dbg(xhci, "port %d not suspended\n", port_index);
- slot_id = xhci_find_slot_id_by_port(hcd, xhci,
- port_index + 1);
- if (slot_id) {
+ /* Bail out if a USB3 port has a new device in link training */
+ if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) {
+ bus_state->bus_suspended = 0;
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_dbg(xhci, "Bus suspend bailout, port in polling\n");
+ return -EBUSY;
+ }
+
+ /* suspend ports in U0, or bail out for new connect changes */
+ if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) {
+ if ((t1 & PORT_CSC) && wake_enabled) {
+ bus_state->bus_suspended = 0;
spin_unlock_irqrestore(&xhci->lock, flags);
- xhci_stop_device(xhci, slot_id, 1);
- spin_lock_irqsave(&xhci->lock, flags);
+ xhci_dbg(xhci, "Bus suspend bailout, port connect change\n");
+ return -EBUSY;
}
+ xhci_dbg(xhci, "port %d not suspended\n", port_index);
t2 &= ~PORT_PLS_MASK;
t2 |= PORT_LINK_STROBE | XDEV_U3;
set_bit(port_index, &bus_state->bus_suspended);
@@ -1518,7 +1531,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
* including the USB 3.0 roothub, but only if CONFIG_PM
* is enabled, so also enable remote wake here.
*/
- if (hcd->self.root_hub->do_remote_wakeup) {
+ if (wake_enabled) {
if (t1 & PORT_CONNECT) {
t2 |= PORT_WKOC_E | PORT_WKDISC_E;
t2 &= ~PORT_WKCONN_E;
@@ -1538,7 +1551,26 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
t1 = xhci_port_state_to_neutral(t1);
if (t1 != t2)
- writel(t2, ports[port_index]->addr);
+ portsc_buf[port_index] = t2;
+ }
+
+ /* write port settings, stopping and suspending ports if needed */
+ port_index = max_ports;
+ while (port_index--) {
+ if (!portsc_buf[port_index])
+ continue;
+ if (test_bit(port_index, &bus_state->bus_suspended)) {
+ int slot_id;
+
+ slot_id = xhci_find_slot_id_by_port(hcd, xhci,
+ port_index + 1);
+ if (slot_id) {
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_stop_device(xhci, slot_id, 1);
+ spin_lock_irqsave(&xhci->lock, flags);
+ }
+ }
+ writel(portsc_buf[port_index], ports[port_index]->addr);
}
hcd->state = HC_STATE_SUSPENDED;
bus_state->next_statechange = jiffies + msecs_to_jiffies(10);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 11644a7659529730eaf2f166efaabe7c3dc7af8c Mon Sep 17 00:00:00 2001
From: "Cherian, George" <George.Cherian(a)cavium.com>
Date: Fri, 9 Nov 2018 17:21:22 +0200
Subject: [PATCH] xhci: Add quirk to workaround the errata seen on Cavium
Thunder-X2 Soc
Implement workaround for ThunderX2 Errata-129 (documented in
CN99XX Known Issues" available at Cavium support site).
As per ThunderX2errata-129, USB 2 device may come up as USB 1
if a connection to a USB 1 device is followed by another connection to
a USB 2 device, the link will come up as USB 1 for the USB 2 device.
Resolution: Reset the PHY after the USB 1 device is disconnected.
The PHY reset sequence is done using private registers in XHCI register
space. After the PHY is reset we check for the PLL lock status and retry
the operation if it fails. From our tests, retrying 4 times is sufficient.
Add a new quirk flag XHCI_RESET_PLL_ON_DISCONNECT to invoke the workaround
in handle_xhci_port_status().
Cc: stable(a)vger.kernel.org
Signed-off-by: George Cherian <george.cherian(a)cavium.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 1fb448cd2667..a9515265db4d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -248,6 +248,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
+ if ((pdev->vendor == PCI_VENDOR_ID_BROADCOM ||
+ pdev->vendor == PCI_VENDOR_ID_CAVIUM) &&
+ pdev->device == 0x9026)
+ xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT;
+
if (xhci->quirks & XHCI_RESET_ON_RESUME)
xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
"QUIRK: Resetting on resume");
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 250b758efe9f..65750582133f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1521,6 +1521,35 @@ static void handle_device_notification(struct xhci_hcd *xhci,
usb_wakeup_notification(udev->parent, udev->portnum);
}
+/*
+ * Quirk hanlder for errata seen on Cavium ThunderX2 processor XHCI
+ * Controller.
+ * As per ThunderX2errata-129 USB 2 device may come up as USB 1
+ * If a connection to a USB 1 device is followed by another connection
+ * to a USB 2 device.
+ *
+ * Reset the PHY after the USB device is disconnected if device speed
+ * is less than HCD_USB3.
+ * Retry the reset sequence max of 4 times checking the PLL lock status.
+ *
+ */
+static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci)
+{
+ struct usb_hcd *hcd = xhci_to_hcd(xhci);
+ u32 pll_lock_check;
+ u32 retry_count = 4;
+
+ do {
+ /* Assert PHY reset */
+ writel(0x6F, hcd->regs + 0x1048);
+ udelay(10);
+ /* De-assert the PHY reset */
+ writel(0x7F, hcd->regs + 0x1048);
+ udelay(200);
+ pll_lock_check = readl(hcd->regs + 0x1070);
+ } while (!(pll_lock_check & 0x1) && --retry_count);
+}
+
static void handle_port_status(struct xhci_hcd *xhci,
union xhci_trb *event)
{
@@ -1654,8 +1683,12 @@ static void handle_port_status(struct xhci_hcd *xhci,
goto cleanup;
}
- if (hcd->speed < HCD_USB3)
+ if (hcd->speed < HCD_USB3) {
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
+ if ((xhci->quirks & XHCI_RESET_PLL_ON_DISCONNECT) &&
+ (portsc & PORT_CSC) && !(portsc & PORT_CONNECT))
+ xhci_cavium_reset_phy_quirk(xhci);
+ }
cleanup:
/* Update event ring dequeue pointer before dropping the lock */
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 5f0c4f197f13..260b259b72bc 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1849,6 +1849,7 @@ struct xhci_hcd {
#define XHCI_INTEL_USB_ROLE_SW BIT_ULL(31)
#define XHCI_ZERO_64B_REGS BIT_ULL(32)
#define XHCI_DEFAULT_PM_RUNTIME_ALLOW BIT_ULL(33)
+#define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34)
unsigned int num_active_eps;
unsigned int limit_active_eps;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 958c0bd86075d4ef1c936998deefe1947e539240 Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma(a)canonical.com>
Date: Fri, 9 Nov 2018 17:21:20 +0200
Subject: [PATCH] usb: xhci: fix uninitialized completion when USB3 port got
wrong status
Realtek USB3.0 Card Reader [0bda:0328] reports wrong port status on
Cannon lake PCH USB3.1 xHCI [8086:a36d] after resume from S3,
after clear port reset it works fine.
Since this device is registered on USB3 roothub at boot,
when port status reports not superspeed, xhci_get_port_status will call
an uninitialized completion in bus_state[0].
Kernel will hang because of NULL pointer.
Restrict the USB2 resume status check in USB2 roothub to fix hang issue.
Cc: stable(a)vger.kernel.org
Signed-off-by: Aaron Ma <aaron.ma(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 12eea73d9f20..60e4ac7ae4f8 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -876,7 +876,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
status |= USB_PORT_STAT_SUSPEND;
}
if ((raw_port_status & PORT_PLS_MASK) == XDEV_RESUME &&
- !DEV_SUPERSPEED_ANY(raw_port_status)) {
+ !DEV_SUPERSPEED_ANY(raw_port_status) && hcd->speed < HCD_USB3) {
if ((raw_port_status & PORT_RESET) ||
!(raw_port_status & PORT_PE))
return 0xffffffff;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 730a6ecd85fc..250b758efe9f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1646,7 +1646,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
* RExit to a disconnect state). If so, let the the driver know it's
* out of the RExit state.
*/
- if (!DEV_SUPERSPEED_ANY(portsc) &&
+ if (!DEV_SUPERSPEED_ANY(portsc) && hcd->speed < HCD_USB3 &&
test_and_clear_bit(hcd_portnum,
&bus_state->rexit_ports)) {
complete(&bus_state->rexit_done[hcd_portnum]);
Greg,
Please see the attached mbox of 5 patches. The first required a slight
backport as noted in the commit message, but the others applied
cleanly. Here's a build+boot of them in clang:
https://travis-ci.com/ClangBuiltLinux/continuous-integration/builds/92778054
(obviously we need to backport warning cleanups, but this set gets it
booting at least)
--
Thanks,
~Nick Desaulniers