Commit 39d42fa96ba1b7d2544db3f8ed5da8fb0d5cb877 made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context.
In this case sometimes when allocating a new crypto request we may get a stacktrace like below:
[ 210.103008][ C0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 210.104746][ C0] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2602, name: fio [ 210.106599][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.108331][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.110212][ C0] Call Trace: [ 210.110921][ C0] <IRQ> [ 210.111527][ C0] dump_stack+0x7d/0xa3 [ 210.112411][ C0] ___might_sleep.cold+0x122/0x151 [ 210.113527][ C0] mempool_alloc+0x16b/0x2f0 [ 210.114524][ C0] ? __queue_work+0x515/0xde0 [ 210.115553][ C0] ? mempool_resize+0x700/0x700 [ 210.116586][ C0] ? crypt_endio+0x91/0x180 [ 210.117479][ C0] ? blk_update_request+0x757/0x1150 [ 210.118513][ C0] ? blk_mq_end_request+0x4b/0x480 [ 210.119572][ C0] ? blk_done_softirq+0x21d/0x340 [ 210.120628][ C0] ? __do_softirq+0x190/0x611 [ 210.121626][ C0] crypt_convert+0x29f9/0x4c00 [ 210.122668][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.123824][ C0] ? kasan_set_track+0x1c/0x30 [ 210.124858][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.125930][ C0] ? kmem_cache_free+0x104/0x470 [ 210.126973][ C0] ? crypt_endio+0x91/0x180 [ 210.127947][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.129165][ C0] blk_update_request+0x757/0x1150 [ 210.130231][ C0] blk_mq_end_request+0x4b/0x480 [ 210.131294][ C0] blk_done_softirq+0x21d/0x340 [ 210.132332][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.133289][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.134399][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.135458][ C0] __do_softirq+0x190/0x611 [ 210.136409][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.137447][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.138507][ C0] </IRQ> [ 210.139118][ C0] do_softirq_own_stack+0x37/0x40 [ 210.140191][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.141151][ C0] common_interrupt+0x74/0x120 [ 210.142171][ C0] asm_common_interrupt+0x1e/0x40 [ 210.143206][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 [ 210.144313][ C0] Code: 38 dc c2 41 0f 28 52 d0 66 0f 38 dc c2 41 0f 28 52 e0 66 0f 38 dc c2 41 0f 28 52 f0 66 0f 38 dc c2 41 0f 28 12 66 0f 38 dc c2 <41> 0f 28 52 10 66 0f 38 dc c2 41 0f 28 52 20 66 0f 38 dc c2 41 0f [ 210.148542][ C0] RSP: 0018:ffff88810dbe6db0 EFLAGS: 00000286 [ 210.149842][ C0] RAX: ffffffff9a90cdc0 RBX: 0000000000000000 RCX: 0000000000000200 [ 210.151576][ C0] RDX: ffff888101e5f240 RSI: ffff888101e5f240 RDI: ffff8881111b5020 [ 210.153339][ C0] RBP: ffff88810dbe6e20 R08: 0000000000000000 R09: 0000000000000020 [ 210.155063][ C0] R10: ffff8881111b5090 R11: 1ffff11021b7cdcc R12: ffffffff9e87cd40 [ 210.156791][ C0] R13: ffff8881111b5210 R14: ffff888101e5f0d8 R15: 0000000000000000 [ 210.158497][ C0] ? aesni_set_key+0x1e0/0x1e0 [ 210.159523][ C0] ? aesni_enc+0xf/0x20 [ 210.160408][ C0] ? glue_xts_req_128bit+0x181/0x6f0 [ 210.161571][ C0] ? aesni_set_key+0x1e0/0x1e0 [ 210.162560][ C0] ? glue_ctr_req_128bit+0x630/0x630 [ 210.163706][ C0] ? kasan_save_stack+0x37/0x50 [ 210.164761][ C0] ? kasan_save_stack+0x20/0x50 [ 210.165786][ C0] ? get_page_from_freelist+0x2052/0x36a0 [ 210.167024][ C0] ? __blkdev_direct_IO_simple+0x43b/0x7e0 [ 210.168288][ C0] ? blkdev_direct_IO+0xd16/0x1020 [ 210.169396][ C0] ? generic_file_direct_write+0x1a3/0x480 [ 210.170648][ C0] ? __generic_file_write_iter+0x1d9/0x530 [ 210.171882][ C0] ? blkdev_write_iter+0x20d/0x3e0 [ 210.172954][ C0] ? vfs_write+0x524/0x770 [ 210.173889][ C0] ? do_syscall_64+0x33/0x40 [ 210.174859][ C0] ? __zone_watermark_ok+0x340/0x340 [ 210.175977][ C0] ? crypt_convert+0x28b6/0x4c00 [ 210.177079][ C0] ? mempool_alloc+0x107/0x2f0 [ 210.178096][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.179193][ C0] ? bio_add_page+0x111/0x170 [ 210.180251][ C0] ? __bio_try_merge_page+0x480/0x480 [ 210.181446][ C0] ? bio_associate_blkg+0x6d/0x100 [ 210.182558][ C0] ? kcryptd_crypt_write_convert+0x5ea/0x980 [ 210.183852][ C0] ? crypt_map+0x5bf/0xc80 [ 210.184838][ C0] ? bio_clone_blkg_association+0x10e/0x2c0 [ 210.186125][ C0] ? __map_bio.isra.0+0x109/0x3f0 [ 210.187204][ C0] ? __split_and_process_non_flush+0x7f9/0xc50 [ 210.188560][ C0] ? __send_empty_flush+0x2d0/0x2d0 [ 210.189697][ C0] ? __part_start_io_acct+0x70/0x2d0 [ 210.190842][ C0] ? dm_submit_bio+0x4d8/0xe40 [ 210.191845][ C0] ? __split_and_process_non_flush+0xc50/0xc50 [ 210.193201][ C0] ? submit_bio_noacct+0x2b9/0xe50 [ 210.194313][ C0] ? blk_queue_enter+0x6d0/0x6d0 [ 210.195372][ C0] ? __bio_add_page+0x246/0x3d0 [ 210.196418][ C0] ? bio_iov_iter_get_pages+0x7dd/0xbe0 [ 210.197611][ C0] ? submit_bio+0xe2/0x460 [ 210.198481][ C0] ? submit_bio_noacct+0xe50/0xe50 [ 210.199496][ C0] ? free_unref_page_commit.constprop.0+0x130/0x330 [ 210.200825][ C0] ? __blkdev_direct_IO_simple+0x43b/0x7e0 [ 210.202050][ C0] ? bd_link_disk_holder+0x690/0x690 [ 210.203239][ C0] ? put_pages_list+0x210/0x210 [ 210.204341][ C0] ? scan_shadow_nodes+0xb0/0xb0 [ 210.205472][ C0] ? _raw_write_lock_irqsave+0xe0/0xe0 [ 210.206698][ C0] ? bd_may_claim+0xc0/0xc0 [ 210.207715][ C0] ? zero_user_segments.constprop.0+0x2e0/0x2e0 [ 210.209092][ C0] ? blkdev_direct_IO+0xd16/0x1020 [ 210.210200][ C0] ? pagevec_lookup_range_tag+0x28/0x60 [ 210.211416][ C0] ? __filemap_fdatawait_range+0xc4/0x1f0 [ 210.212669][ C0] ? page_cache_next_miss+0x1e0/0x1e0 [ 210.213842][ C0] ? generic_file_buffered_read+0x520/0x9e0 [ 210.215128][ C0] ? delete_from_page_cache_batch+0x850/0x850 [ 210.216470][ C0] ? bd_abort_claiming+0xd0/0xd0 [ 210.217531][ C0] ? file_remove_privs+0x74/0x430 [ 210.218589][ C0] ? filemap_check_errors+0x50/0xe0 [ 210.219705][ C0] ? generic_file_direct_write+0x1a3/0x480 [ 210.220979][ C0] ? __generic_file_write_iter+0x1d9/0x530 [ 210.222238][ C0] ? blkdev_write_iter+0x20d/0x3e0 [ 210.223328][ C0] ? bd_unlink_disk_holder+0x360/0x360 [ 210.224464][ C0] ? new_sync_write+0x37b/0x620 [ 210.225511][ C0] ? new_sync_read+0x610/0x610 [ 210.226539][ C0] ? _cond_resched+0x17/0x80 [ 210.227539][ C0] ? inode_security+0x58/0x100 [ 210.228582][ C0] ? security_file_permission+0x54/0x450 [ 210.229796][ C0] ? vfs_write+0x524/0x770 [ 210.230758][ C0] ? __x64_sys_pwrite64+0x197/0x1f0 [ 210.231890][ C0] ? vfs_write+0x770/0x770 [ 210.232869][ C0] ? do_syscall_64+0x33/0x40 [ 210.233839][ C0] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
Fix this by allocating crypto requests with GFP_ATOMIC mask in interrupt context
Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Reported-by: Maciej S. Szmigiero mail@maciej.szmigiero.name Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin ignat@cloudflare.com --- drivers/md/dm-crypt.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-)
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78b..e4fd690c70e1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error);
-static void crypt_alloc_req_skcipher(struct crypt_config *cc, +static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
- if (!ctx->r.req) - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + }
skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
@@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_callback(ctx->r.req, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); + + return 0; }
-static void crypt_alloc_req_aead(struct crypt_config *cc, +static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + }
aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
@@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); + + return 0; }
-static void crypt_alloc_req(struct crypt_config *cc, +static int crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { if (crypt_integrity_aead(cc)) - crypt_alloc_req_aead(cc, ctx); + return crypt_alloc_req_aead(cc, ctx); else - crypt_alloc_req_skcipher(cc, ctx); + return crypt_alloc_req_skcipher(cc, ctx); }
static void crypt_free_req_skcipher(struct crypt_config *cc, @@ -1539,7 +1549,10 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
- crypt_alloc_req(cc, ctx); + r = crypt_alloc_req(cc, ctx); + if (r) + return BLK_STS_RESOURCE; + atomic_inc(&ctx->cc_pending);
if (crypt_integrity_aead(cc))
On Wed, 30 Dec 2020, Ignat Korchagin wrote:
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78b..e4fd690c70e1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1539,7 +1549,10 @@ static blk_status_t crypt_convert(struct crypt_config *cc, while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
crypt_alloc_req(cc, ctx);
r = crypt_alloc_req(cc, ctx);
if (r)
return BLK_STS_RESOURCE;
- atomic_inc(&ctx->cc_pending);
if (crypt_integrity_aead(cc)) -- 2.20.1
I'm not quite convinced that returning BLK_STS_RESOURCE will help. The block layer will convert this value back to -ENOMEM and return it to the caller, resulting in an I/O error.
Note that GFP_ATOMIC allocations may fail anytime and you must handle allocation failure gracefully - i.e. process the request without any error.
An acceptable solution would be to punt the request to a workqueue and do GFP_NOIO allocation from the workqueue. Or add the request to some list and process the list when some other request completes.
You should write a test that simulates allocation failure and verify that the kernel handles it gracefully without any I/O error.
Mikulas
On Fri, Jan 1, 2021 at 10:00 AM Mikulas Patocka mpatocka@redhat.com wrote:
On Wed, 30 Dec 2020, Ignat Korchagin wrote:
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78b..e4fd690c70e1 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1539,7 +1549,10 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
crypt_alloc_req(cc, ctx);
r = crypt_alloc_req(cc, ctx);
if (r)
return BLK_STS_RESOURCE;
atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc))
-- 2.20.1
I'm not quite convinced that returning BLK_STS_RESOURCE will help. The block layer will convert this value back to -ENOMEM and return it to the caller, resulting in an I/O error.
Note that GFP_ATOMIC allocations may fail anytime and you must handle allocation failure gracefully - i.e. process the request without any error.
An acceptable solution would be to punt the request to a workqueue and do GFP_NOIO allocation from the workqueue. Or add the request to some list and process the list when some other request completes.
We can do the workqueue, if that's the desired behaviour. The second patch from this patchset already adds the code for the request to be retried from the workqueue if crypt_convert returns BLK_STS_DEV_RESOURCE, so all is needed is to change returning BLK_STS_RESOURCE to BLK_STS_DEV_RESOURCE here. Does that sound reasonable?
You should write a test that simulates allocation failure and verify that the kernel handles it gracefully without any I/O error.
I already have the test for the second patch in the set, but will adapt it to make sure the allocation failure codepath is covered as well.
Mikulas
linux-stable-mirror@lists.linaro.org