Hi Greg,
The mq-deadline 'async_depth' sysfs attribute doesn't behave as intended. This patch series fixes the implementation of that attribute.
The original patch series is available here: https://lore.kernel.org/linux-block/20240509170149.7639-1-bvanassche@acm.org...
Thanks,
Bart.
Bart Van Assche (2): block: Call .limit_depth() after .hctx has been set block/mq-deadline: Fix the tag reservation code
block/blk-mq.c | 6 +++++- block/mq-deadline.c | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-)
commit 6259151c04d4e0085e00d2dcb471ebdd1778e72e upstream.
Call .limit_depth() after data->hctx has been set such that data->hctx can be used in .limit_depth() implementations.
Cc: Christoph Hellwig hch@lst.de Cc: Damien Le Moal dlemoal@kernel.org Cc: Zhiguo Niu zhiguo.niu@unisoc.com Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests") Signed-off-by: Bart Van Assche bvanassche@acm.org Tested-by: Zhiguo Niu zhiguo.niu@unisoc.com Reviewed-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20240509170149.7639-2-bvanassche@acm.org Signed-off-by: Jens Axboe axboe@kernel.dk --- block/blk-mq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c index 3afa5c8d165b..daf0e4f3444e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -439,6 +439,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) { + void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *) = NULL; struct request_queue *q = data->q; u64 alloc_time_ns = 0; struct request *rq; @@ -465,7 +466,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) !blk_op_is_passthrough(data->cmd_flags) && e->type->ops.limit_depth && !(data->flags & BLK_MQ_REQ_RESERVED)) - e->type->ops.limit_depth(data->cmd_flags, data); + limit_depth = e->type->ops.limit_depth; }
retry: @@ -477,6 +478,9 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) if (data->flags & BLK_MQ_REQ_RESERVED) data->rq_flags |= RQF_RESV;
+ if (limit_depth) + limit_depth(data->cmd_flags, data); + /* * Try batched alloc if we want more than 1 tag. */
commit 39823b47bbd40502632ffba90ebb34fff7c8b5e8 upstream.
The current tag reservation code is based on a misunderstanding of the meaning of data->shallow_depth. Fix the tag reservation code as follows: * By default, do not reserve any tags for synchronous requests because for certain use cases reserving tags reduces performance. See also Harshit Mogalapalli, [bug-report] Performance regression with fio sequential-write on a multipath setup, 2024-03-07 (https://lore.kernel.org/linux-block/5ce2ae5d-61e2-4ede-ad55-551112602401@ora...) * Reduce min_shallow_depth to one because min_shallow_depth must be less than or equal any shallow_depth value. * Scale dd->async_depth from the range [1, nr_requests] to [1, bits_per_sbitmap_word].
Cc: Christoph Hellwig hch@lst.de Cc: Damien Le Moal dlemoal@kernel.org Cc: Zhiguo Niu zhiguo.niu@unisoc.com Fixes: 07757588e507 ("block/mq-deadline: Reserve 25% of scheduler tags for synchronous requests") Signed-off-by: Bart Van Assche bvanassche@acm.org Reviewed-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20240509170149.7639-3-bvanassche@acm.org Signed-off-by: Jens Axboe axboe@kernel.dk --- block/mq-deadline.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c index f10c2a0d18d4..ff029e1acc4d 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -597,6 +597,20 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) return rq; }
+/* + * 'depth' is a number in the range 1..INT_MAX representing a number of + * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since + * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow(). + * Values larger than q->nr_requests have the same effect as q->nr_requests. + */ +static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth) +{ + struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags; + const unsigned int nrr = hctx->queue->nr_requests; + + return ((qdepth << bt->sb.shift) + nrr - 1) / nrr; +} + /* * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag(). @@ -613,7 +627,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) * Throttle asynchronous requests and writes such that these requests * do not block the allocation of synchronous requests. */ - data->shallow_depth = dd->async_depth; + data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth); }
/* Called by blk_mq_update_nr_requests(). */ @@ -623,9 +637,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) struct deadline_data *dd = q->elevator->elevator_data; struct blk_mq_tags *tags = hctx->sched_tags;
- dd->async_depth = max(1UL, 3 * q->nr_requests / 4); + dd->async_depth = q->nr_requests;
- sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth); + sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1); }
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
On Thu, Aug 08, 2024 at 03:54:57PM -0700, Bart Van Assche wrote:
Hi Greg,
The mq-deadline 'async_depth' sysfs attribute doesn't behave as intended. This patch series fixes the implementation of that attribute.
The original patch series is available here: https://lore.kernel.org/linux-block/20240509170149.7639-1-bvanassche@acm.org...
Thanks,
Bart.
Bart Van Assche (2): block: Call .limit_depth() after .hctx has been set block/mq-deadline: Fix the tag reservation code
block/blk-mq.c | 6 +++++- block/mq-deadline.c | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-)
Both now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org