- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] irqdomain: Look for existing mapping only once" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 6e6f75c9c98d2d246d90411ff2b6f0cd271f4cba # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128480246217(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 6e6f75c9c98d ("irqdomain: Look for existing mapping only once") a359f757965a ("irq: Fix typos in comments") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6e6f75c9c98d2d246d90411ff2b6f0cd271f4cba Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Mon, 13 Feb 2023 11:42:46 +0100 Subject: [PATCH] irqdomain: Look for existing mapping only once Avoid looking for an existing mapping twice when creating a new mapping using irq_create_fwspec_mapping() by factoring out the actual allocation which is shared with irq_create_mapping_affinity(). The new helper function will also be used to fix a shared-interrupt mapping race, hence the Fixes tag. Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings") Cc: stable(a)vger.kernel.org # 4.8 Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org> Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Signed-off-by: Marc Zyngier <maz(a)kernel.org> Link: https://lore.kernel.org/r/20230213104302.17307-5-johan+linaro@kernel.org diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9f5b96cf6c5c..9f95047e4bc7 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -682,6 +682,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) EXPORT_SYMBOL_GPL(irq_create_direct_mapping); #endif +static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain, + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) +{ + struct device_node *of_node = irq_domain_get_of_node(domain); + int virq; + + pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + + /* Allocate a virtual interrupt number */ + virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), + affinity); + if (virq <= 0) { + pr_debug("-> virq allocation failed\n"); + return 0; + } + + if (irq_domain_associate(domain, virq, hwirq)) { + irq_free_desc(virq); + return 0; + } + + pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", + hwirq, of_node_full_name(of_node), virq); + + return virq; +} + /** * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space * @domain: domain owning this hardware interrupt or NULL for default domain @@ -694,14 +722,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping); * on the number returned from that call. */ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, - irq_hw_number_t hwirq, - const struct irq_affinity_desc *affinity) + irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity) { - struct device_node *of_node; int virq; - pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); - /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; @@ -709,34 +734,15 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); return 0; } - pr_debug("-> using domain @%p\n", domain); - - of_node = irq_domain_get_of_node(domain); /* Check if mapping already exists */ virq = irq_find_mapping(domain, hwirq); if (virq) { - pr_debug("-> existing mapping on virq %d\n", virq); + pr_debug("existing mapping on virq %d\n", virq); return virq; } - /* Allocate a virtual interrupt number */ - virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), - affinity); - if (virq <= 0) { - pr_debug("-> virq allocation failed\n"); - return 0; - } - - if (irq_domain_associate(domain, virq, hwirq)) { - irq_free_desc(virq); - return 0; - } - - pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", - hwirq, of_node_full_name(of_node), virq); - - return virq; + return __irq_create_mapping_affinity(domain, hwirq, affinity); } EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); @@ -841,7 +847,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) return 0; } else { /* Create mapping */ - virq = irq_create_mapping(domain, hwirq); + virq = __irq_create_mapping_affinity(domain, hwirq, NULL); if (!virq) return virq; }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] brd: check for REQ_NOWAIT and set correct page allocation" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812503219822(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask") db0ccc44a20b ("brd: return 0/-error from brd_insert_page()") ba91fd01aad2 ("block/brd: Use the enum req_op type") 3e08773c3841 ("block: switch polling to be bio based") 6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED") d729cf9acb93 ("io_uring: don't sleep when polling for I/O") ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument") 28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid") efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal") c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper") f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper") 71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO") 0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio") 47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch") 24b83deb29b7 ("block: move struct request to blk-mq.h") badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>") 2e9bc3465ac5 ("block: move elevator.h to block/") 9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration") 90138237a562 ("block: remove the unused blk_queue_state enum") cc9c884dd7f4 ("block: call submit_bio_checks under q_usage_counter") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Thu, 16 Feb 2023 08:01:08 -0700 Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation mask If REQ_NOWAIT is set, then do a non-blocking allocation if the operation is a write and we need to insert a new page. Currently REQ_NOWAIT cannot be set as the queue isn't marked as supporting nowait, this change is in preparation for allowing that. radix_tree_preload() warns on attempting to call it with an allocation mask that doesn't allow blocking. While that warning could arguably be removed, we need to handle radix insertion failures anyway as they are more likely if we cannot block to get memory. Remove legacy BUG_ON()'s and turn them into proper errors instead, one for the allocation failure and one for finding a page that doesn't match the correct index. Cc: stable(a)vger.kernel.org # 5.10+ Reviewed-by: Christoph Hellwig <hch(a)lst.de> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 15a148d5aad9..00f3c5b51a01 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) /* * Insert a new page for a given sector, if one does not already exist. */ -static int brd_insert_page(struct brd_device *brd, sector_t sector) +static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) { pgoff_t idx; struct page *page; - gfp_t gfp_flags; + int ret = 0; page = brd_lookup_page(brd, sector); if (page) return 0; - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; - page = alloc_page(gfp_flags); + page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) return -ENOMEM; - if (radix_tree_preload(GFP_NOIO)) { + if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) { __free_page(page); return -ENOMEM; } @@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector) if (radix_tree_insert(&brd->brd_pages, idx, page)) { __free_page(page); page = radix_tree_lookup(&brd->brd_pages, idx); - BUG_ON(!page); - BUG_ON(page->index != idx); + if (!page) + ret = -ENOMEM; + else if (page->index != idx) + ret = -EIO; } else { brd->brd_nr_pages++; } spin_unlock(&brd->brd_lock); radix_tree_preload_end(); - return 0; + return ret; } /* @@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd) /* * copy_to_brd_setup must be called before copy_to_brd. It may sleep. */ -static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) +static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, + gfp_t gfp) { unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; size_t copy; int ret; copy = min_t(size_t, n, PAGE_SIZE - offset); - ret = brd_insert_page(brd, sector); + ret = brd_insert_page(brd, sector, gfp); if (ret) return ret; if (copy < n) { sector += copy >> SECTOR_SHIFT; - ret = brd_insert_page(brd, sector); + ret = brd_insert_page(brd, sector, gfp); } return ret; } @@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, enum req_op op, + unsigned int len, unsigned int off, blk_opf_t opf, sector_t sector) { void *mem; int err = 0; - if (op_is_write(op)) { - err = copy_to_brd_setup(brd, sector, len); + if (op_is_write(opf)) { + /* + * Must use NOIO because we don't want to recurse back into the + * block or filesystem layers from page reclaim. + */ + gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; + + err = copy_to_brd_setup(brd, sector, len, gfp); if (err) goto out; } mem = kmap_atomic(page); - if (!op_is_write(op)) { + if (!op_is_write(opf)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio) (len & (SECTOR_SIZE - 1))); err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio_op(bio), sector); + bio->bi_opf, sector); if (err) { + if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } bio_io_error(bio); return; }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] brd: check for REQ_NOWAIT and set correct page allocation" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678125031200144(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: 6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask") db0ccc44a20b ("brd: return 0/-error from brd_insert_page()") ba91fd01aad2 ("block/brd: Use the enum req_op type") 3e08773c3841 ("block: switch polling to be bio based") 6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED") d729cf9acb93 ("io_uring: don't sleep when polling for I/O") ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument") 28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid") efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal") c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper") f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper") 71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO") 0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio") 47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch") 24b83deb29b7 ("block: move struct request to blk-mq.h") badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>") 2e9bc3465ac5 ("block: move elevator.h to block/") 9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration") 90138237a562 ("block: remove the unused blk_queue_state enum") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Thu, 16 Feb 2023 08:01:08 -0700 Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation mask If REQ_NOWAIT is set, then do a non-blocking allocation if the operation is a write and we need to insert a new page. Currently REQ_NOWAIT cannot be set as the queue isn't marked as supporting nowait, this change is in preparation for allowing that. radix_tree_preload() warns on attempting to call it with an allocation mask that doesn't allow blocking. While that warning could arguably be removed, we need to handle radix insertion failures anyway as they are more likely if we cannot block to get memory. Remove legacy BUG_ON()'s and turn them into proper errors instead, one for the allocation failure and one for finding a page that doesn't match the correct index. Cc: stable(a)vger.kernel.org # 5.10+ Reviewed-by: Christoph Hellwig <hch(a)lst.de> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 15a148d5aad9..00f3c5b51a01 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) /* * Insert a new page for a given sector, if one does not already exist. */ -static int brd_insert_page(struct brd_device *brd, sector_t sector) +static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp) { pgoff_t idx; struct page *page; - gfp_t gfp_flags; + int ret = 0; page = brd_lookup_page(brd, sector); if (page) return 0; - /* - * Must use NOIO because we don't want to recurse back into the - * block or filesystem layers from page reclaim. - */ - gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM; - page = alloc_page(gfp_flags); + page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM); if (!page) return -ENOMEM; - if (radix_tree_preload(GFP_NOIO)) { + if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) { __free_page(page); return -ENOMEM; } @@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector) if (radix_tree_insert(&brd->brd_pages, idx, page)) { __free_page(page); page = radix_tree_lookup(&brd->brd_pages, idx); - BUG_ON(!page); - BUG_ON(page->index != idx); + if (!page) + ret = -ENOMEM; + else if (page->index != idx) + ret = -EIO; } else { brd->brd_nr_pages++; } spin_unlock(&brd->brd_lock); radix_tree_preload_end(); - return 0; + return ret; } /* @@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd) /* * copy_to_brd_setup must be called before copy_to_brd. It may sleep. */ -static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) +static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n, + gfp_t gfp) { unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT; size_t copy; int ret; copy = min_t(size_t, n, PAGE_SIZE - offset); - ret = brd_insert_page(brd, sector); + ret = brd_insert_page(brd, sector, gfp); if (ret) return ret; if (copy < n) { sector += copy >> SECTOR_SHIFT; - ret = brd_insert_page(brd, sector); + ret = brd_insert_page(brd, sector, gfp); } return ret; } @@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, enum req_op op, + unsigned int len, unsigned int off, blk_opf_t opf, sector_t sector) { void *mem; int err = 0; - if (op_is_write(op)) { - err = copy_to_brd_setup(brd, sector, len); + if (op_is_write(opf)) { + /* + * Must use NOIO because we don't want to recurse back into the + * block or filesystem layers from page reclaim. + */ + gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO; + + err = copy_to_brd_setup(brd, sector, len, gfp); if (err) goto out; } mem = kmap_atomic(page); - if (!op_is_write(op)) { + if (!op_is_write(opf)) { copy_from_brd(mem + off, brd, sector, len); flush_dcache_page(page); } else { @@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio) (len & (SECTOR_SIZE - 1))); err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio_op(bio), sector); + bio->bi_opf, sector); if (err) { + if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) { + bio_wouldblock_error(bio); + return; + } bio_io_error(bio); return; }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] brd: mark as nowait compatible" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812500026148(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 67205f80be99 ("brd: mark as nowait compatible") e1528830bd4e ("block/brd: add error handling support for add_disk()") f7bf35862477 ("brd: reduce the brd_devices_mutex scope") 7f9b348cb5e9 ("brd: convert to blk_alloc_disk/blk_cleanup_disk") f4be591f1436 ("brd: expose number of allocated pages in debugfs") 7cc178a6b994 ("brd: use __register_blkdev to allocate devices on demand") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 15 Feb 2023 16:43:47 -0700 Subject: [PATCH] brd: mark as nowait compatible By default, non-mq drivers do not support nowait. This causes io_uring to use a slower path as the driver cannot be trust not to block. brd can safely set the nowait flag, as worst case all it does is a NOIO allocation. For io_uring, this makes a substantial difference. Before: submitter=0, tid=453, file=/dev/ram0, node=-1 polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 Engine=io_uring, sq_ring=128, cq_ring=128 IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31 IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32 IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31 IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32 IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31 and after: submitter=0, tid=354, file=/dev/ram0, node=-1 polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 Engine=io_uring, sq_ring=128, cq_ring=128 IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31 IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31 IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32 IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31 IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31 or about an 8x in difference. Now that brd is prepared to deal with REQ_NOWAIT reads/writes, mark it as supporting that. Cc: stable(a)vger.kernel.org # 5.10+ Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung… Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 00f3c5b51a01..740631dcdd0e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -418,6 +418,7 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) goto out_cleanup_disk;

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] brd: mark as nowait compatible" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812499941176(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: 67205f80be99 ("brd: mark as nowait compatible") e1528830bd4e ("block/brd: add error handling support for add_disk()") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Wed, 15 Feb 2023 16:43:47 -0700 Subject: [PATCH] brd: mark as nowait compatible By default, non-mq drivers do not support nowait. This causes io_uring to use a slower path as the driver cannot be trust not to block. brd can safely set the nowait flag, as worst case all it does is a NOIO allocation. For io_uring, this makes a substantial difference. Before: submitter=0, tid=453, file=/dev/ram0, node=-1 polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 Engine=io_uring, sq_ring=128, cq_ring=128 IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31 IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32 IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31 IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32 IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31 and after: submitter=0, tid=354, file=/dev/ram0, node=-1 polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128 Engine=io_uring, sq_ring=128, cq_ring=128 IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31 IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31 IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32 IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31 IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31 or about an 8x in difference. Now that brd is prepared to deal with REQ_NOWAIT reads/writes, mark it as supporting that. Cc: stable(a)vger.kernel.org # 5.10+ Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung… Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 00f3c5b51a01..740631dcdd0e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -418,6 +418,7 @@ static int brd_alloc(int i) /* Tell the block layer that this is not a rotational device */ blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue); err = add_disk(disk); if (err) goto out_cleanup_disk;

2 years, 8 months

1
0
0 0

Request to backport "sysctl: fix proc_dobool() usability" to stable kernels

by Thomas Weißschuh

Hi -stable team, please backport the commit f1aa2eb5ea05 ("sysctl: fix proc_dobool() usability") to the stable kernels containing commit 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled"). (Which seems only to be the 6.2 branch only at the moment) Without this backport the sysctl dev.net.legacy_tiocsti to enable ioctl(TIOCSTI) is not functional. So on kernels that don't enable CONFIG_LEGACY_TIOCSTI, ioctl(TIOCSTI) is not usable at all. This ioctl is used for the copy-and-paste functionality of the screenreader "fenrir". ( https://github.com/chrys87/fenrir ) Reported-by: Storm Dragon <stormdragon2976(a)gmail.com> Link: https://lore.kernel.org/lkml/ZAOi9hDBTYqoAZuI@hotmail.com/

2 years, 8 months

2
3
0 0

FAILED: patch "[PATCH] x86/crash: Disable virt in core NMI crash handler to avoid" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781236386157(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: 26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown") ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") 4d1d0977a215 ("x86: Fix a handful of typos") 22ca7ee933a3 ("x86/apic: Provide and use helper for send_IPI_allbutself()") 6a1cb5f5c641 ("x86/apic: Add static key to Control IPI shorthands") bdda3b93e660 ("x86/apic: Move no_ipi_broadcast() out of 32bit") 9c92374b631d ("x86/cpu: Move arch_smt_update() to a neutral place") c94f0718fb1c ("x86/apic: Consolidate the apic local headers") ba77b2a02e00 ("x86/apic: Move apic_flat_64 header into apic directory") 8b542da37287 ("x86/apic: Move ipi header into apic directory") 521b82fee98c ("x86/apic: Cleanup the include maze") cdc86c9d1f82 ("x86/apic: Move IPI inlines into ipi.c") 2591bc4e8d70 ("x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI") 747d5a1bf293 ("x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails") 7e300dabb7e7 ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 223") fa4bff165070 ("Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Wed, 30 Nov 2022 23:36:47 +0000 Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid double shootdown Disable virtualization in crash_nmi_callback() and rework the emergency_vmx_disable_all() path to do an NMI shootdown if and only if a shootdown has not already occurred. NMI crash shootdown fundamentally can't support multiple invocations as responding CPUs are deliberately put into halt state without unblocking NMIs. But, the emergency reboot path doesn't have any work of its own, it simply cares about disabling virtualization, i.e. so long as a shootdown occurred, emergency reboot doesn't care who initiated the shootdown, or when. If "crash_kexec_post_notifiers" is specified on the kernel command line, panic() will invoke crash_smp_send_stop() and result in a second call to nmi_shootdown_cpus() during native_machine_emergency_restart(). Invoke the callback _before_ disabling virtualization, as the current VMCS needs to be cleared before doing VMXOFF. Note, this results in a subtle change in ordering between disabling virtualization and stopping Intel PT on the responding CPUs. While VMX and Intel PT do interact, VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one another, which is all that matters when panicking. Harden nmi_shootdown_cpus() against multiple invocations to try and capture any such kernel bugs via a WARN instead of hanging the system during a crash/dump, e.g. prior to the recent hardening of register_nmi_handler(), re-registering the NMI handler would trigger a double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y. list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800. WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70 Call Trace: __register_nmi_handler+0xcf/0x130 nmi_shootdown_cpus+0x39/0x90 native_machine_emergency_restart+0x1c9/0x1d0 panic+0x237/0x29b Extract the disabling logic to a common helper to deduplicate code, and to prepare for doing the shootdown in the emergency reboot path if SVM is supported. Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected against a second invocation by a cpu_vmx_enabled() check as the kdump handler would disable VMX if it ran first. Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") Cc: stable(a)vger.kernel.org Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..bc5b4d788c08 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +void cpu_emergency_disable_virtualization(void); + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 305514431f26..cdd92ab43cda 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -37,7 +37,6 @@ #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> -#include <asm/virtext.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> @@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - /* * Disable Intel PT to stop its logging */ @@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); + cpu_emergency_disable_virtualization(); /* * Disable Intel PT to stop its logging diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c3636ea4aa71..374c14b3a9bf 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -528,10 +528,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct pt_regs *regs) -{ - cpu_emergency_vmxoff(); -} +static inline void nmi_shootdown_cpus_on_restart(void); /* Use NMIs as IPIs to tell all CPUs to disable virtualization */ static void emergency_vmx_disable_all(void) @@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void) __cpu_emergency_vmxoff(); /* Halt and exit VMX root operation on the other CPUs. */ - nmi_shootdown_cpus(vmxoff_nmi); + nmi_shootdown_cpus_on_restart(); } } @@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs) /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); +} + #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; @@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, regs); + if (shootdown_callback) + shootdown_callback(cpu, regs); + + /* + * Prepare the CPU for reboot _after_ invoking the callback so that the + * callback can safely use virtualization instructions, e.g. VMCLEAR. + */ + cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; } -/* - * Halt all other CPUs, calling the specified function on each of them +/** + * nmi_shootdown_cpus - Stop other CPUs via NMI + * @callback: Optional callback to be invoked from the NMI handler + * + * The NMI handler on the remote CPUs invokes @callback, if not + * NULL, first and then disables virtualization to ensure that + * INIT is recognized during reboot. * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. + * nmi_shootdown_cpus() can only be invoked once. After the first + * invocation all other CPUs are stuck in crash_nmi_callback() and + * cannot respond to a second NMI. */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { unsigned long msecs; + local_irq_disable(); + /* + * Avoid certain doom if a shootdown already occurred; re-registering + * the NMI handler will cause list corruption, modifying the callback + * will do who knows what, etc... + */ + if (WARN_ON_ONCE(crash_ipi_issued)) + return; + /* Make a note of crashing cpu. Will be used in NMI callback. */ crashing_cpu = safe_smp_processor_id(); @@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) msecs--; } - /* Leave the nmi callback set */ + /* + * Leave the nmi callback set, shootdown is a one-time thing. Clearing + * the callback could result in a NULL pointer dereference if a CPU + * (finally) responds after the timeout expires. + */ +} + +static inline void nmi_shootdown_cpus_on_restart(void) +{ + if (!crash_ipi_issued) + nmi_shootdown_cpus(NULL); } /* @@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* No other CPUs to shoot down */ } +static inline void nmi_shootdown_cpus_on_restart(void) { } + void run_crash_ipi_callback(struct pt_regs *regs) { }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] x86/crash: Disable virt in core NMI crash handler to avoid" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123636156122(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: 26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown") ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") 4d1d0977a215 ("x86: Fix a handful of typos") 22ca7ee933a3 ("x86/apic: Provide and use helper for send_IPI_allbutself()") 6a1cb5f5c641 ("x86/apic: Add static key to Control IPI shorthands") bdda3b93e660 ("x86/apic: Move no_ipi_broadcast() out of 32bit") 9c92374b631d ("x86/cpu: Move arch_smt_update() to a neutral place") c94f0718fb1c ("x86/apic: Consolidate the apic local headers") ba77b2a02e00 ("x86/apic: Move apic_flat_64 header into apic directory") 8b542da37287 ("x86/apic: Move ipi header into apic directory") 521b82fee98c ("x86/apic: Cleanup the include maze") cdc86c9d1f82 ("x86/apic: Move IPI inlines into ipi.c") 2591bc4e8d70 ("x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI") 747d5a1bf293 ("x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails") 7e300dabb7e7 ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 223") fa4bff165070 ("Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Wed, 30 Nov 2022 23:36:47 +0000 Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid double shootdown Disable virtualization in crash_nmi_callback() and rework the emergency_vmx_disable_all() path to do an NMI shootdown if and only if a shootdown has not already occurred. NMI crash shootdown fundamentally can't support multiple invocations as responding CPUs are deliberately put into halt state without unblocking NMIs. But, the emergency reboot path doesn't have any work of its own, it simply cares about disabling virtualization, i.e. so long as a shootdown occurred, emergency reboot doesn't care who initiated the shootdown, or when. If "crash_kexec_post_notifiers" is specified on the kernel command line, panic() will invoke crash_smp_send_stop() and result in a second call to nmi_shootdown_cpus() during native_machine_emergency_restart(). Invoke the callback _before_ disabling virtualization, as the current VMCS needs to be cleared before doing VMXOFF. Note, this results in a subtle change in ordering between disabling virtualization and stopping Intel PT on the responding CPUs. While VMX and Intel PT do interact, VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one another, which is all that matters when panicking. Harden nmi_shootdown_cpus() against multiple invocations to try and capture any such kernel bugs via a WARN instead of hanging the system during a crash/dump, e.g. prior to the recent hardening of register_nmi_handler(), re-registering the NMI handler would trigger a double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y. list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800. WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70 Call Trace: __register_nmi_handler+0xcf/0x130 nmi_shootdown_cpus+0x39/0x90 native_machine_emergency_restart+0x1c9/0x1d0 panic+0x237/0x29b Extract the disabling logic to a common helper to deduplicate code, and to prepare for doing the shootdown in the emergency reboot path if SVM is supported. Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected against a second invocation by a cpu_vmx_enabled() check as the kdump handler would disable VMX if it ran first. Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") Cc: stable(a)vger.kernel.org Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..bc5b4d788c08 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +void cpu_emergency_disable_virtualization(void); + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 305514431f26..cdd92ab43cda 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -37,7 +37,6 @@ #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> -#include <asm/virtext.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> @@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - /* * Disable Intel PT to stop its logging */ @@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); + cpu_emergency_disable_virtualization(); /* * Disable Intel PT to stop its logging diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c3636ea4aa71..374c14b3a9bf 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -528,10 +528,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct pt_regs *regs) -{ - cpu_emergency_vmxoff(); -} +static inline void nmi_shootdown_cpus_on_restart(void); /* Use NMIs as IPIs to tell all CPUs to disable virtualization */ static void emergency_vmx_disable_all(void) @@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void) __cpu_emergency_vmxoff(); /* Halt and exit VMX root operation on the other CPUs. */ - nmi_shootdown_cpus(vmxoff_nmi); + nmi_shootdown_cpus_on_restart(); } } @@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs) /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); +} + #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; @@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, regs); + if (shootdown_callback) + shootdown_callback(cpu, regs); + + /* + * Prepare the CPU for reboot _after_ invoking the callback so that the + * callback can safely use virtualization instructions, e.g. VMCLEAR. + */ + cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; } -/* - * Halt all other CPUs, calling the specified function on each of them +/** + * nmi_shootdown_cpus - Stop other CPUs via NMI + * @callback: Optional callback to be invoked from the NMI handler + * + * The NMI handler on the remote CPUs invokes @callback, if not + * NULL, first and then disables virtualization to ensure that + * INIT is recognized during reboot. * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. + * nmi_shootdown_cpus() can only be invoked once. After the first + * invocation all other CPUs are stuck in crash_nmi_callback() and + * cannot respond to a second NMI. */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { unsigned long msecs; + local_irq_disable(); + /* + * Avoid certain doom if a shootdown already occurred; re-registering + * the NMI handler will cause list corruption, modifying the callback + * will do who knows what, etc... + */ + if (WARN_ON_ONCE(crash_ipi_issued)) + return; + /* Make a note of crashing cpu. Will be used in NMI callback. */ crashing_cpu = safe_smp_processor_id(); @@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) msecs--; } - /* Leave the nmi callback set */ + /* + * Leave the nmi callback set, shootdown is a one-time thing. Clearing + * the callback could result in a NULL pointer dereference if a CPU + * (finally) responds after the timeout expires. + */ +} + +static inline void nmi_shootdown_cpus_on_restart(void) +{ + if (!crash_ipi_issued) + nmi_shootdown_cpus(NULL); } /* @@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* No other CPUs to shoot down */ } +static inline void nmi_shootdown_cpus_on_restart(void) { } + void run_crash_ipi_callback(struct pt_regs *regs) { }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] x86/crash: Disable virt in core NMI crash handler to avoid" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123634056(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: 26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown") ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") 4d1d0977a215 ("x86: Fix a handful of typos") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Wed, 30 Nov 2022 23:36:47 +0000 Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid double shootdown Disable virtualization in crash_nmi_callback() and rework the emergency_vmx_disable_all() path to do an NMI shootdown if and only if a shootdown has not already occurred. NMI crash shootdown fundamentally can't support multiple invocations as responding CPUs are deliberately put into halt state without unblocking NMIs. But, the emergency reboot path doesn't have any work of its own, it simply cares about disabling virtualization, i.e. so long as a shootdown occurred, emergency reboot doesn't care who initiated the shootdown, or when. If "crash_kexec_post_notifiers" is specified on the kernel command line, panic() will invoke crash_smp_send_stop() and result in a second call to nmi_shootdown_cpus() during native_machine_emergency_restart(). Invoke the callback _before_ disabling virtualization, as the current VMCS needs to be cleared before doing VMXOFF. Note, this results in a subtle change in ordering between disabling virtualization and stopping Intel PT on the responding CPUs. While VMX and Intel PT do interact, VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one another, which is all that matters when panicking. Harden nmi_shootdown_cpus() against multiple invocations to try and capture any such kernel bugs via a WARN instead of hanging the system during a crash/dump, e.g. prior to the recent hardening of register_nmi_handler(), re-registering the NMI handler would trigger a double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y. list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800. WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70 Call Trace: __register_nmi_handler+0xcf/0x130 nmi_shootdown_cpus+0x39/0x90 native_machine_emergency_restart+0x1c9/0x1d0 panic+0x237/0x29b Extract the disabling logic to a common helper to deduplicate code, and to prepare for doing the shootdown in the emergency reboot path if SVM is supported. Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected against a second invocation by a cpu_vmx_enabled() check as the kdump handler would disable VMX if it ran first. Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported") Cc: stable(a)vger.kernel.org Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com> Cc: Paolo Bonzini <pbonzini(a)redhat.com> Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com> Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de> Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc(a)google.com> diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..bc5b4d788c08 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +void cpu_emergency_disable_virtualization(void); + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 305514431f26..cdd92ab43cda 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -37,7 +37,6 @@ #include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/reboot.h> -#include <asm/virtext.h> #include <asm/intel_pt.h> #include <asm/crash.h> #include <asm/cmdline.h> @@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Disable VMX or SVM if needed. - * - * We need to disable virtualization on all CPUs. - * Having VMX or SVM enabled on any CPU may break rebooting - * after the kdump kernel has finished its task. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); - /* * Disable Intel PT to stop its logging */ @@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) */ cpu_crash_vmclear_loaded_vmcss(); - /* Booting kdump kernel with VMX or SVM enabled won't work, - * because (among other limitations) we can't disable paging - * with the virt flags. - */ - cpu_emergency_vmxoff(); - cpu_emergency_svm_disable(); + cpu_emergency_disable_virtualization(); /* * Disable Intel PT to stop its logging diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c3636ea4aa71..374c14b3a9bf 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -528,10 +528,7 @@ static inline void kb_wait(void) } } -static void vmxoff_nmi(int cpu, struct pt_regs *regs) -{ - cpu_emergency_vmxoff(); -} +static inline void nmi_shootdown_cpus_on_restart(void); /* Use NMIs as IPIs to tell all CPUs to disable virtualization */ static void emergency_vmx_disable_all(void) @@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void) __cpu_emergency_vmxoff(); /* Halt and exit VMX root operation on the other CPUs. */ - nmi_shootdown_cpus(vmxoff_nmi); + nmi_shootdown_cpus_on_restart(); } } @@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs) /* This is the CPU performing the emergency shutdown work. */ int crashing_cpu = -1; +/* + * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during + * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if + * GIF=0, i.e. if the crash occurred between CLGI and STGI. + */ +void cpu_emergency_disable_virtualization(void) +{ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); +} + #if defined(CONFIG_SMP) static nmi_shootdown_cb shootdown_callback; @@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; local_irq_disable(); - shootdown_callback(cpu, regs); + if (shootdown_callback) + shootdown_callback(cpu, regs); + + /* + * Prepare the CPU for reboot _after_ invoking the callback so that the + * callback can safely use virtualization instructions, e.g. VMCLEAR. + */ + cpu_emergency_disable_virtualization(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) return NMI_HANDLED; } -/* - * Halt all other CPUs, calling the specified function on each of them +/** + * nmi_shootdown_cpus - Stop other CPUs via NMI + * @callback: Optional callback to be invoked from the NMI handler + * + * The NMI handler on the remote CPUs invokes @callback, if not + * NULL, first and then disables virtualization to ensure that + * INIT is recognized during reboot. * - * This function can be used to halt all other CPUs on crash - * or emergency reboot time. The function passed as parameter - * will be called inside a NMI handler on all CPUs. + * nmi_shootdown_cpus() can only be invoked once. After the first + * invocation all other CPUs are stuck in crash_nmi_callback() and + * cannot respond to a second NMI. */ void nmi_shootdown_cpus(nmi_shootdown_cb callback) { unsigned long msecs; + local_irq_disable(); + /* + * Avoid certain doom if a shootdown already occurred; re-registering + * the NMI handler will cause list corruption, modifying the callback + * will do who knows what, etc... + */ + if (WARN_ON_ONCE(crash_ipi_issued)) + return; + /* Make a note of crashing cpu. Will be used in NMI callback. */ crashing_cpu = safe_smp_processor_id(); @@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) msecs--; } - /* Leave the nmi callback set */ + /* + * Leave the nmi callback set, shootdown is a one-time thing. Clearing + * the callback could result in a NULL pointer dereference if a CPU + * (finally) responds after the timeout expires. + */ +} + +static inline void nmi_shootdown_cpus_on_restart(void) +{ + if (!crash_ipi_issued) + nmi_shootdown_cpus(NULL); } /* @@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) /* No other CPUs to shoot down */ } +static inline void nmi_shootdown_cpus_on_restart(void) { } + void run_crash_ipi_callback(struct pt_regs *regs) { }

2 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] KVM: s390: disable migration mode when dirty tracking is" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x f2d3155e2a6bac44d16f04415a321e8707d895c6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167810000636131(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: f2d3155e2a6b ("KVM: s390: disable migration mode when dirty tracking is disabled") ec5c86976674 ("KVM: s390: Skip gfn/size sanity checks on memslot DELETE or FLAGS_ONLY") cf5b486922dc ("KVM: s390: Use "new" memslot instead of userspace memory region") 106ee47dc633 ("docs: kvm: Convert api.txt to ReST format") 6c972ba685d5 ("docs: kvm: convert devices/vm.txt to ReST") aff7aeea5483 ("docs: kvm: convert devices/vfio.txt to ReST") e777a5bd98c6 ("docs: kvm: convert devices/vcpu.txt to ReST") e94474300361 ("docs: kvm: convert devices/s390_flic.txt to ReST") 05c47036c62e ("docs: kvm: convert devices/mpic.txt to ReST") c0d1c8a0af59 ("docs: kvm: devices/arm-vgit-v3.txt to ReST") d371c011fc5e ("docs: kvm: devices/arm-vgic-its.txt to ReST format") 7bd460fc1dfa ("docs: kvm: add arm/pvtime.rst to index.rst") 290a6bb06de9 ("arm64: KVM: Add UAPI notes for swapped registers") 22945688acd4 ("KVM: PPC: Book3S HV: Support reset of secure guest") 008e359c76d8 ("KVM: PPC: Book3S HV: Radix changes for secure guest") 60f0a643aa44 ("KVM: PPC: Book3S HV: Shared pages support for secure guests") ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests") a4b28f5c6798 ("Merge remote-tracking branch 'kvmarm/kvm-arm64/stolen-time' into kvmarm-master/next") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f2d3155e2a6bac44d16f04415a321e8707d895c6 Mon Sep 17 00:00:00 2001 From: Nico Boehr <nrb(a)linux.ibm.com> Date: Fri, 27 Jan 2023 15:05:32 +0100 Subject: [PATCH] KVM: s390: disable migration mode when dirty tracking is disabled Migration mode is a VM attribute which enables tracking of changes in storage attributes (PGSTE). It assumes dirty tracking is enabled on all memslots to keep a dirty bitmap of pages with changed storage attributes. When enabling migration mode, we currently check that dirty tracking is enabled for all memslots. However, userspace can disable dirty tracking without disabling migration mode. Since migration mode is pointless with dirty tracking disabled, disable migration mode whenever userspace disables dirty tracking on any slot. Also update the documentation to clarify that dirty tracking must be enabled when enabling migration mode, which is already enforced by the code in kvm_s390_vm_start_migration(). Also highlight in the documentation for KVM_S390_GET_CMMA_BITS that it can now fail with -EINVAL when dirty tracking is disabled while migration mode is on. Move all the error codes to a table so this stays readable. To disable migration mode, slots_lock should be held, which is taken in kvm_set_memory_region() and thus held in kvm_arch_prepare_memory_region(). Restructure the prepare code a bit so all the sanity checking is done before disabling migration mode. This ensures migration mode isn't disabled when some sanity check fails. Cc: stable(a)vger.kernel.org Fixes: 190df4a212a7 ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com> Reviewed-by: Janosch Frank <frankja(a)linux.ibm.com> Reviewed-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com> Link: https://lore.kernel.org/r/20230127140532.230651-2-nrb@linux.ibm.com Message-Id: <20230127140532.230651-2-nrb(a)linux.ibm.com> [frankja(a)linux.ibm.com: fixed commit message typo, moved api.rst error table upwards] Signed-off-by: Janosch Frank <frankja(a)linux.ibm.com> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index deb494f759ed..8cd7fd05d53b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4449,6 +4449,18 @@ not holding a previously reported uncorrected error). :Parameters: struct kvm_s390_cmma_log (in, out) :Returns: 0 on success, a negative value on error +Errors: + + ====== ============================================================= + ENOMEM not enough memory can be allocated to complete the task + ENXIO if CMMA is not enabled + EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled + EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been + disabled (and thus migration mode was automatically disabled) + EFAULT if the userspace address is invalid or if no page table is + present for the addresses (e.g. when using hugepages). + ====== ============================================================= + This ioctl is used to get the values of the CMMA bits on the s390 architecture. It is meant to be used in two scenarios: @@ -4529,12 +4541,6 @@ mask is unused. values points to the userspace buffer where the result will be stored. -This ioctl can fail with -ENOMEM if not enough memory can be allocated to -complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if -KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with --EFAULT if the userspace address is invalid or if no page table is -present for the addresses (e.g. when using hugepages). - 4.108 KVM_S390_SET_CMMA_BITS ---------------------------- diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 60acc39e0e93..147efec626e5 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -302,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration. Setting this attribute when migration mode is already active will have no effects. +Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When +dirty tracking is disabled on any memslot, migration mode is automatically +stopped. + :Parameters: none :Returns: -ENOMEM if there is not enough free memory to start migration mode; -EINVAL if the state of the VM is invalid (e.g. no memory defined); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e4890e04b210..cb72f9a09fb3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5633,23 +5633,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; - if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) - return 0; + if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) { + /* + * A few sanity checks. We can have memory slots which have to be + * located/ended at a segment boundary (1MB). The memory in userland is + * ok to be fragmented into various different vmas. It is okay to mmap() + * and munmap() stuff in this slot after doing this call at any time + */ - /* A few sanity checks. We can have memory slots which have to be - located/ended at a segment boundary (1MB). The memory in userland is - ok to be fragmented into various different vmas. It is okay to mmap() - and munmap() stuff in this slot after doing this call at any time */ + if (new->userspace_addr & 0xffffful) + return -EINVAL; - if (new->userspace_addr & 0xffffful) - return -EINVAL; + size = new->npages * PAGE_SIZE; + if (size & 0xffffful) + return -EINVAL; - size = new->npages * PAGE_SIZE; - if (size & 0xffffful) - return -EINVAL; + if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) + return -EINVAL; + } - if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) - return -EINVAL; + if (!kvm->arch.migration_mode) + return 0; + + /* + * Turn off migration mode when: + * - userspace creates a new memslot with dirty logging off, + * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and + * dirty logging is turned off. + * Migration mode expects dirty page logging being enabled to store + * its dirty bitmap. + */ + if (change != KVM_MR_DELETE && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + WARN(kvm_s390_vm_stop_migration(kvm), + "Failed to stop migration mode"); return 0; }

2 years, 8 months

3
2
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror