From: Mike Snitzer snitzer@redhat.com
commit 9f6dc633761006f974701d4c88da71ab68670749 upstream.
Commit d208b89401e0 ("dm: fix mempool NULL pointer race when completing IO") didn't go far enough.
When bio_end_io_acct ends the count of in-flight I/Os may reach zero and the DM device may be suspended. There is a possibility that the suspend races with dm_stats_account_io.
Fix this by adding percpu "pending_io" counters to track outstanding dm_io. Move kicking of suspend queue to dm_io_dec_pending(). Also, rename md_in_flight_bios() to dm_in_flight_bios() and update it to iterate all pending_io counters.
Fixes: d208b89401e0 ("dm: fix mempool NULL pointer race when completing IO") Cc: stable@vger.kernel.org Co-developed-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/md/dm-core.h | 2 ++ drivers/md/dm.c | 35 +++++++++++++++++++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-)
--- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -65,6 +65,8 @@ struct mapped_device { struct gendisk *disk; struct dax_device *dax_dev;
+ unsigned long __percpu *pending_io; + /* * A list of ios that arrived while we were suspended. */ --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -507,10 +507,6 @@ static void end_io_acct(struct mapped_de dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), true, duration, stats_aux); - - /* nudge anyone waiting on suspend queue */ - if (unlikely(wq_has_sleeper(&md->wait))) - wake_up(&md->wait); }
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) @@ -531,6 +527,7 @@ static struct dm_io *alloc_io(struct map io->magic = DM_IO_MAGIC; io->status = 0; atomic_set(&io->io_count, 1); + this_cpu_inc(*md->pending_io); io->orig_bio = bio; io->md = md; spin_lock_init(&io->endio_lock); @@ -828,6 +825,12 @@ void dm_io_dec_pending(struct dm_io *io, stats_aux = io->stats_aux; free_io(md, io); end_io_acct(md, bio, start_time, &stats_aux); + smp_wmb(); + this_cpu_dec(*md->pending_io); + + /* nudge anyone waiting on suspend queue */ + if (unlikely(wq_has_sleeper(&md->wait))) + wake_up(&md->wait);
if (io_error == BLK_STS_DM_REQUEUE) return; @@ -1697,6 +1700,11 @@ static void cleanup_mapped_device(struct blk_cleanup_disk(md->disk); }
+ if (md->pending_io) { + free_percpu(md->pending_io); + md->pending_io = NULL; + } + cleanup_srcu_struct(&md->io_barrier);
mutex_destroy(&md->suspend_lock); @@ -1794,6 +1802,10 @@ static struct mapped_device *alloc_dev(i if (!md->wq) goto bad;
+ md->pending_io = alloc_percpu(unsigned long); + if (!md->pending_io) + goto bad; + dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */ @@ -2209,16 +2221,13 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put);
-static bool md_in_flight_bios(struct mapped_device *md) +static bool dm_in_flight_bios(struct mapped_device *md) { int cpu; - struct block_device *part = dm_disk(md)->part0; - long sum = 0; + unsigned long sum = 0;
- for_each_possible_cpu(cpu) { - sum += part_stat_local_read_cpu(part, in_flight[0], cpu); - sum += part_stat_local_read_cpu(part, in_flight[1], cpu); - } + for_each_possible_cpu(cpu) + sum += *per_cpu_ptr(md->pending_io, cpu);
return sum != 0; } @@ -2231,7 +2240,7 @@ static int dm_wait_for_bios_completion(s while (true) { prepare_to_wait(&md->wait, &wait, task_state);
- if (!md_in_flight_bios(md)) + if (!dm_in_flight_bios(md)) break;
if (signal_pending_state(task_state, current)) { @@ -2243,6 +2252,8 @@ static int dm_wait_for_bios_completion(s } finish_wait(&md->wait, &wait);
+ smp_rmb(); + return r; }