On Wed, Mar 9, 2022 at 11:23 PM Song Liu song@kernel.org wrote:
On Wed, Mar 9, 2022 at 10:48 PM Christoph Hellwig hch@infradead.org wrote:
On Tue, Mar 08, 2022 at 10:42:09PM -0800, Song Liu wrote:
RAID arrays check/repair operations benefit a lot from merging requests. If we only check the previous entry for merge attempt, many merge will be missed. As a result, significant regression is observed for RAID check and repair.
Fix this by checking more than just the previous entry when plug->multiple_queues == true.
But this also means really significant CPU overhead for all other workloads.
Would the following check help with these workloads?
if (!plug->multiple_queues) break;
This improves the check/repair speed of a 20-HDD raid6 from 19 MB/s to 103 MB/s.
What driver uses multiple queues for HDDs?
Can you explain the workload submitted by a md a bit better? I wonder if we can easily do the right thing straight in the md driver.
It is the md sync_thread doing check and repair. Basically, the md thread reads all the disks and computes parity from data.
Maybe we should add a new flag to struct blk_plug for this special case?
I meant something like:
diff --git c/block/blk-core.c w/block/blk-core.c index 1039515c99d6..4fb09243e908 100644 --- c/block/blk-core.c +++ w/block/blk-core.c @@ -1303,6 +1303,12 @@ void blk_finish_plug(struct blk_plug *plug) } EXPORT_SYMBOL(blk_finish_plug);
+void blk_plug_merge_aggressively(struct blk_plug *plug) +{ + plug->aggresive_merge = true; +} +EXPORT_SYMBOL(blk_plug_merge_aggressively); + void blk_io_schedule(void) { /* Prevent hang_check timer from firing at us during very long I/O */ diff --git c/block/blk-merge.c w/block/blk-merge.c index 4de34a332c9f..8b673288bc5f 100644 --- c/block/blk-merge.c +++ w/block/blk-merge.c @@ -1089,12 +1089,14 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(plug->mq_list)) return false;
- /* check the previously added entry for a quick merge attempt */ - rq = rq_list_peek(&plug->mq_list); - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q == q) { + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + } + if (!plug->aggresive_merge) + break; } return false; } diff --git c/drivers/md/md.c w/drivers/md/md.c index 4d38bd7dadd6..6be56632a412 100644 --- c/drivers/md/md.c +++ w/drivers/md/md.c @@ -8901,6 +8901,7 @@ void md_do_sync(struct md_thread *thread) update_time = jiffies;
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); while (j < max_sectors) { sector_t sectors;
diff --git c/drivers/md/raid1.c w/drivers/md/raid1.c index e2d8acb1e988..501d15532170 100644 --- c/drivers/md/raid1.c +++ w/drivers/md/raid1.c @@ -838,6 +838,7 @@ static void flush_pending_writes(struct r1conf *conf) */ __set_current_state(TASK_RUNNING); blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); flush_bio_list(conf, bio); blk_finish_plug(&plug); } else @@ -2591,6 +2592,7 @@ static void raid1d(struct md_thread *thread) }
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); for (;;) {
flush_pending_writes(conf); diff --git c/drivers/md/raid10.c w/drivers/md/raid10.c index 2b969f70a31f..0a594613a075 100644 --- c/drivers/md/raid10.c +++ w/drivers/md/raid10.c @@ -876,6 +876,7 @@ static void flush_pending_writes(struct r10conf *conf) __set_current_state(TASK_RUNNING);
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); /* flush any pending bitmap writes to disk * before proceeding w/ I/O */ md_bitmap_unplug(conf->mddev->bitmap); @@ -3088,6 +3089,7 @@ static void raid10d(struct md_thread *thread) }
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); for (;;) {
flush_pending_writes(conf); diff --git c/drivers/md/raid5.c w/drivers/md/raid5.c index ffe720c73b0a..a96884ca5f08 100644 --- c/drivers/md/raid5.c +++ w/drivers/md/raid5.c @@ -6447,6 +6447,7 @@ static void raid5_do_work(struct work_struct *work) pr_debug("+++ raid5worker active\n");
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); handled = 0; spin_lock_irq(&conf->device_lock); while (1) { @@ -6497,6 +6498,7 @@ static void raid5d(struct md_thread *thread) md_check_recovery(mddev);
blk_start_plug(&plug); + blk_plug_merge_aggressively(&plug); handled = 0; spin_lock_irq(&conf->device_lock); while (1) { diff --git c/include/linux/blkdev.h w/include/linux/blkdev.h index 16b47035e4b0..45b0da416302 100644 --- c/include/linux/blkdev.h +++ w/include/linux/blkdev.h @@ -775,6 +775,7 @@ struct blk_plug { bool multiple_queues; bool has_elevator; bool nowait; + bool aggresive_merge;
struct list_head cb_list; /* md requires an unplug callback */ }; @@ -791,6 +792,7 @@ extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, extern void blk_start_plug(struct blk_plug *); extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short); extern void blk_finish_plug(struct blk_plug *); +void blk_plug_merge_aggressively(struct blk_plug *plug);
void blk_flush_plug(struct blk_plug *plug, bool from_schedule);