Re: [PATCH] block: Fix blk_mq_try_issue_directly()

4 Apr 2019

On Thu, 2019-04-04 at 08:28 -0700, Bart Van Assche wrote:
...
On Thu, 2019-04-04 at 11:09 -0400, Laurence Oberman wrote:
...
When I bisected and got to that commit I was unable to revert it to
test without it.
I showed that in an earlier update, had merge issues.
loberman@lobewsrhel linux_torvalds]$ git revert 7f556a44e61d
error: could not revert 7f556a4... blk-mq: refactor the code of
issue
request directly
hint: after resolving the conflicts, mark the corrected paths
hint: with 'git add <paths>' or 'git rm <paths>'
hint: and commit the result with 'git commit'
Hi Laurence,
On my setup the following commits revert cleanly if I revert them in
the
following order:

d6a51a97c0b2 ("blk-mq: replace and kill

blk_mq_request_issue_directly") # v5.0.

5b7a6f128aad ("blk-mq: issue directly with bypass 'false' in

blk_mq_sched_insert_requests") # v5.0.

7f556a44e61d ("blk-mq: refactor the code of issue request

directly") # v5.0.
The result of these three reverts is the patch below. Test feedback
for
this patch would be appreciated.
Thanks,
Bart.
Subject: [PATCH] block: Revert recent blk_mq_request_issue_directly()
changes
blk_mq_try_issue_directly() can return BLK_STS*_RESOURCE for requests
that
have been queued. If that happens when blk_mq_try_issue_directly() is
called
by the dm-mpath driver then the dm-mpath will try to resubmit a
request that
is already queued and a kernel crash follows. Since it is nontrivial
to fix
blk_mq_request_issue_directly(), revert the most recent
blk_mq_request_issue_directly() changes.
This patch reverts the following commits:

d6a51a97c0b2 ("blk-mq: replace and kill

blk_mq_request_issue_directly") # v5.0.

5b7a6f128aad ("blk-mq: issue directly with bypass 'false' in

blk_mq_sched_insert_requests") # v5.0.

7f556a44e61d ("blk-mq: refactor the code of issue request

directly") # v5.0.
Cc: Christoph Hellwig hch@infradead.org
Cc: Hannes Reinecke hare@suse.com
Cc: James Smart james.smart@broadcom.com
Cc: Ming Lei ming.lei@redhat.com
Cc: Jianchao Wang jianchao.w.wang@oracle.com
Cc: Keith Busch keith.busch@intel.com
Cc: Dongli Zhang dongli.zhang@oracle.com
Cc: Laurence Oberman loberman@redhat.com
Cc: stable@vger.kernel.org
Reported-by: Laurence Oberman loberman@redhat.com
Fixes: 7f556a44e61d ("blk-mq: refactor the code of issue request
directly") # v5.0.
Signed-off-by: Bart Van Assche bvanassche@acm.org

block/blk-core.c     |   4 +-
 block/blk-mq-sched.c |   8 +--
 block/blk-mq.c       | 122 ++++++++++++++++++++++-------------------
--
 block/blk-mq.h       |   6 +--
 4 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 2921af6f8d33..5bca56016575 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1232,8 +1232,6 @@ static int blk_cloned_rq_check_limits(struct
request_queue *q,
  */
 blk_status_t blk_insert_cloned_request(struct request_queue *q,
struct request *rq)
 {

blk_qc_t unused;
if (blk_cloned_rq_check_limits(q, rq))
  return BLK_STS_IOERR;

@@ -1249,7 +1247,7 @@ blk_status_t blk_insert_cloned_request(struct
request_queue *q, struct request *
    * bypass a potential scheduler on the bottom device for
    * insert.
    */

return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused,

true, true);

return blk_mq_request_issue_directly(rq, true);

}
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 40905539afed..aa6bc5c02643 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -423,10 +423,12 @@ void blk_mq_sched_insert_requests(struct
blk_mq_hw_ctx *hctx,
   	 * busy in case of 'none' scheduler, and this way may
save
   	 * us one extra enqueue & dequeue to sw queue.
   	 */

if (!hctx->dispatch_busy && !e && !run_queue_async)




if (!hctx->dispatch_busy && !e && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);




else


	blk_mq_insert_requests(hctx, ctx, list);




	if (list_empty(list))


		return;


}


blk_mq_insert_requests(hctx, ctx, list);

}

blk_mq_run_hw_queue(hctx, run_queue_async);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 652d0c6d5945..403984a557bb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1808,74 +1808,76 @@ static blk_status_t
__blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
   return ret;
 }
 
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx
*hctx,
   					struct request *rq,
   					blk_qc_t *cookie,

				bool bypass, bool last)




				bool bypass_insert,



bool last)
 {
   struct request_queue *q = rq->q;
   bool run_queue = true;

blk_status_t ret = BLK_STS_RESOURCE;
int srcu_idx;
bool force = false;


hctx_lock(hctx, &srcu_idx);
/*
* hctx_lock is needed before checking quiesced flag.




* RCU or SRCU read lock is needed before checking quiesced



flag.
    *

* When queue is stopped or quiesced, ignore 'bypass', insert


* and return BLK_STS_OK to caller, and avoid driver to try to


* dispatch again.




* When queue is stopped or quiesced, ignore 'bypass_insert'



from

* blk_mq_request_issue_directly(), and return BLK_STS_OK to



caller,

* and avoid driver to try to dispatch again.

*/


if (unlikely(blk_mq_hctx_stopped(hctx) ||

blk_queue_quiesced(q))) {

if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;


bypass = false;


goto out_unlock;




bypass_insert = false;


goto insert;

}


if (unlikely(q->elevator && !bypass))
goto out_unlock;




if (q->elevator && !bypass_insert)
goto insert;



if (!blk_mq_get_dispatch_budget(hctx))

goto out_unlock;




goto insert;



if (!blk_mq_get_driver_tag(rq)) {
   	blk_mq_put_dispatch_budget(hctx);

goto out_unlock;




goto insert;

}


/*
* Always add a request that has been through


*.queue_rq() to the hardware dispatch list.


*/


force = true;
ret = __blk_mq_issue_directly(hctx, rq, cookie, last);

-out_unlock:

return __blk_mq_issue_directly(hctx, rq, cookie, last);

+insert:

if (bypass_insert)
return BLK_STS_RESOURCE;



blk_mq_request_bypass_insert(rq, run_queue);
return BLK_STS_OK;

+}



+static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,

struct request *rq, blk_qc_t *cookie)



+{

blk_status_t ret;
int srcu_idx;

might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);

hctx_lock(hctx, &srcu_idx);

ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false,

true);

if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
blk_mq_request_bypass_insert(rq, true);


else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);



hctx_unlock(hctx, srcu_idx);

+}



+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool
last)
+{

blk_status_t ret;
int srcu_idx;
blk_qc_t unused_cookie;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

hctx_lock(hctx, &srcu_idx);
ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie,

true, last);
   hctx_unlock(hctx, srcu_idx);

switch (ret) {
case BLK_STS_OK:
break;


case BLK_STS_DEV_RESOURCE:
case BLK_STS_RESOURCE:
if (force) {


	blk_mq_request_bypass_insert(rq, run_queue);


	/*


	 * We have to return BLK_STS_OK for the DM


	 * to avoid livelock. Otherwise, we return


	 * the real result to indicate whether the


	 * request is direct-issued successfully.


	 */


	ret = bypass ? BLK_STS_OK : ret;


} else if (!bypass) {


	blk_mq_sched_insert_request(rq, false,


				    run_queue, false);


}


break;


default:
if (!bypass)


	blk_mq_end_request(rq, ret);


break;


}

return ret;
 }
@@ -1883,20 +1885,22 @@ blk_status_t blk_mq_try_issue_directly(struct
blk_mq_hw_ctx *hctx,
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
   	struct list_head *list)
 {

blk_qc_t unused;
blk_status_t ret = BLK_STS_OK;
while (!list_empty(list)) {


blk_status_t ret;

struct request *rq = list_first_entry(list, struct

request,
   			queuelist);
 
   	list_del_init(&rq->queuelist);

if (ret == BLK_STS_OK)


	ret = blk_mq_try_issue_directly(hctx, rq,



&unused,

					false,




ret = blk_mq_request_issue_directly(rq,



list_empty(list));

if (ret != BLK_STS_OK) {


	if (ret == BLK_STS_RESOURCE ||


			ret == BLK_STS_DEV_RESOURCE) {


		blk_mq_request_bypass_insert(rq,
				list_empty(list



));

else


	blk_mq_sched_insert_request(rq, false, true,



false);

		break;


	}


	blk_mq_end_request(rq, ret);


}

}

/*
@@ -1904,7 +1908,7 @@ void blk_mq_try_issue_list_directly(struct
blk_mq_hw_ctx *hctx,
    * the driver there was more coming, but that turned out to
    * be a lie.
    */

if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)


if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
hctx->queue->mq_ops->commit_rqs(hctx);

}
 
@@ -2017,13 +2021,13 @@ static blk_qc_t blk_mq_make_request(struct
request_queue *q, struct bio *bio)
   	if (same_queue_rq) {
   		data.hctx = same_queue_rq->mq_hctx;
   		blk_mq_try_issue_directly(data.hctx,
same_queue_rq,

			&cookie, false, true);




			&cookie);

}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
	!data.hctx->dispatch_busy)) {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);


blk_mq_try_issue_directly(data.hctx, rq, &cookie,



false, true);

blk_mq_try_issue_directly(data.hctx, rq, &cookie);

} else {
blk_mq_put_ctx(data.ctx);
blk_mq_bio_to_request(rq, bio);

diff --git a/block/blk-mq.h b/block/blk-mq.h
index d704fc7766f4..423ea88ab6fb 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request
*rq, bool run_queue);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct
blk_mq_ctx *ctx,
   			struct list_head *list);
 
-blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,

				struct request *rq,


				blk_qc_t *cookie,


				bool bypass, bool



last);
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_issue_directly(struct request *rq, bool
last);
 void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
   			    struct list_head *list);
Hello Bart
I can conform, reverting those 3 in order also resolves the panic I was
seeing. I have 3 reboot tests of the srpt server allowing the client ot
remain stable and try reconnect.
For the above patch:
Tested-by: Laurence Oberman loberman@redhat.com
Too many changes in code I am not familiar enough to review and
comnment why reverting helps.
Thanks
Laurence

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH] block: Fix blk_mq_try_issue_directly()