July 2019 - Linux-stable-mirror

FAILED: patch "[PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From effa467870c7612012885df4e246bdb8ffd8e44c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <dima(a)arista.com> Date: Tue, 16 Jul 2019 22:38:05 +0100 Subject: [PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue Intel VT-d driver was reworked to use common deferred flushing implementation. Previously there was one global per-cpu flush queue, afterwards - one per domain. Before deferring a flush, the queue should be allocated and initialized. Currently only domains with IOMMU_DOMAIN_DMA type initialize their flush queue. It's probably worth to init it for static or unmanaged domains too, but it may be arguable - I'm leaving it to iommu folks. Prevent queuing an iova flush if the domain doesn't have a queue. The defensive check seems to be worth to keep even if queue would be initialized for all kinds of domains. And is easy backportable. On 4.19.43 stable kernel it has a user-visible effect: previously for devices in si domain there were crashes, on sata devices: BUG: spinlock bad magic on CPU#6, swapper/0/1 lock: 0xffff88844f582008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #1 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x45/0x115 intel_unmap+0x107/0x113 intel_unmap_sg+0x6b/0x76 __ata_qc_complete+0x7f/0x103 ata_qc_complete+0x9b/0x26a ata_qc_complete_multiple+0xd0/0xe3 ahci_handle_port_interrupt+0x3ee/0x48a ahci_handle_port_intr+0x73/0xa9 ahci_single_level_irq_intr+0x40/0x60 __handle_irq_event_percpu+0x7f/0x19a handle_irq_event_percpu+0x32/0x72 handle_irq_event+0x38/0x56 handle_edge_irq+0x102/0x121 handle_irq+0x147/0x15c do_IRQ+0x66/0xf2 common_interrupt+0xf/0xf RIP: 0010:__do_softirq+0x8c/0x2df The same for usb devices that use ehci-pci: BUG: spinlock bad magic on CPU#0, swapper/0/1 lock: 0xffff88844f402008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #4 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x77/0x145 intel_unmap+0x107/0x113 intel_unmap_page+0xe/0x10 usb_hcd_unmap_urb_setup_for_dma+0x53/0x9d usb_hcd_unmap_urb_for_dma+0x17/0x100 unmap_urb_for_dma+0x22/0x24 __usb_hcd_giveback_urb+0x51/0xc3 usb_giveback_urb_bh+0x97/0xde tasklet_action_common.isra.4+0x5f/0xa1 tasklet_action+0x2d/0x30 __do_softirq+0x138/0x2df irq_exit+0x7d/0x8b smp_apic_timer_interrupt+0x10f/0x151 apic_timer_interrupt+0xf/0x20 </IRQ> RIP: 0010:_raw_spin_unlock_irqrestore+0x17/0x39 Cc: David Woodhouse <dwmw2(a)infradead.org> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Lu Baolu <baolu.lu(a)linux.intel.com> Cc: iommu(a)lists.linux-foundation.org Cc: <stable(a)vger.kernel.org> # 4.14+ Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Dmitry Safonov <dima(a)arista.com> Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com> Signed-off-by: Joerg Roedel <jroedel(a)suse.de> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9b1d62d03370..72c6d647bec9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3561,7 +3561,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted)) { + if (intel_iommu_strict || (pdev && pdev->untrusted) || + !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d499b2621239..8413ae54904a 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return !!iovad->fq; +} + static void free_iova_flush_queue(struct iova_domain *iovad) { - if (!iovad->fq) + if (!has_iova_flush_queue(iovad)) return; if (timer_pending(&iovad->fq_timer)) @@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) { + struct iova_fq __percpu *queue; int cpu; atomic64_set(&iovad->fq_flush_start_cnt, 0); atomic64_set(&iovad->fq_flush_finish_cnt, 0); - iovad->fq = alloc_percpu(struct iova_fq); - if (!iovad->fq) + queue = alloc_percpu(struct iova_fq); + if (!queue) return -ENOMEM; iovad->flush_cb = flush_cb; @@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, for_each_possible_cpu(cpu) { struct iova_fq *fq; - fq = per_cpu_ptr(iovad->fq, cpu); + fq = per_cpu_ptr(queue, cpu); fq->head = 0; fq->tail = 0; spin_lock_init(&fq->lock); } + smp_wmb(); + + iovad->fq = queue; + timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); atomic_set(&iovad->fq_timer_on, 0); diff --git a/include/linux/iova.h b/include/linux/iova.h index 781b96ac706f..cd0f1de901a8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return false; +} + static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)

5 years, 11 months

3
2
0 0

FAILED: patch "[PATCH] iommu/iova: Remove stale cached32_node" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 9eed17d37c77171cf5ffb95c4257f87df3cd4c8f Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris(a)chris-wilson.co.uk> Date: Sat, 20 Jul 2019 19:08:48 +0100 Subject: [PATCH] iommu/iova: Remove stale cached32_node Since the cached32_node is allowed to be advanced above dma_32bit_pfn (to provide a shortcut into the limited range), we need to be careful to remove the to be freed node if it is the cached32_node. [ 48.477773] BUG: KASAN: use-after-free in __cached_rbnode_delete_update+0x68/0x110 [ 48.477812] Read of size 8 at addr ffff88870fc19020 by task kworker/u8:1/37 [ 48.477843] [ 48.477879] CPU: 1 PID: 37 Comm: kworker/u8:1 Tainted: G U 5.2.0+ #735 [ 48.477915] Hardware name: Intel Corporation NUC7i5BNK/NUC7i5BNB, BIOS BNKBL357.86A.0052.2017.0918.1346 09/18/2017 [ 48.478047] Workqueue: i915 __i915_gem_free_work [i915] [ 48.478075] Call Trace: [ 48.478111] dump_stack+0x5b/0x90 [ 48.478137] print_address_description+0x67/0x237 [ 48.478178] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478212] __kasan_report.cold.3+0x1c/0x38 [ 48.478240] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478280] ? __cached_rbnode_delete_update+0x68/0x110 [ 48.478308] __cached_rbnode_delete_update+0x68/0x110 [ 48.478344] private_free_iova+0x2b/0x60 [ 48.478378] iova_magazine_free_pfns+0x46/0xa0 [ 48.478403] free_iova_fast+0x277/0x340 [ 48.478443] fq_ring_free+0x15a/0x1a0 [ 48.478473] queue_iova+0x19c/0x1f0 [ 48.478597] cleanup_page_dma.isra.64+0x62/0xb0 [i915] [ 48.478712] __gen8_ppgtt_cleanup+0x63/0x80 [i915] [ 48.478826] __gen8_ppgtt_cleanup+0x42/0x80 [i915] [ 48.478940] __gen8_ppgtt_clear+0x433/0x4b0 [i915] [ 48.479053] __gen8_ppgtt_clear+0x462/0x4b0 [i915] [ 48.479081] ? __sg_free_table+0x9e/0xf0 [ 48.479116] ? kfree+0x7f/0x150 [ 48.479234] i915_vma_unbind+0x1e2/0x240 [i915] [ 48.479352] i915_vma_destroy+0x3a/0x280 [i915] [ 48.479465] __i915_gem_free_objects+0xf0/0x2d0 [i915] [ 48.479579] __i915_gem_free_work+0x41/0xa0 [i915] [ 48.479607] process_one_work+0x495/0x710 [ 48.479642] worker_thread+0x4c7/0x6f0 [ 48.479687] ? process_one_work+0x710/0x710 [ 48.479724] kthread+0x1b2/0x1d0 [ 48.479774] ? kthread_create_worker_on_cpu+0xa0/0xa0 [ 48.479820] ret_from_fork+0x1f/0x30 [ 48.479864] [ 48.479907] Allocated by task 631: [ 48.479944] save_stack+0x19/0x80 [ 48.479994] __kasan_kmalloc.constprop.6+0xc1/0xd0 [ 48.480038] kmem_cache_alloc+0x91/0xf0 [ 48.480082] alloc_iova+0x2b/0x1e0 [ 48.480125] alloc_iova_fast+0x58/0x376 [ 48.480166] intel_alloc_iova+0x90/0xc0 [ 48.480214] intel_map_sg+0xde/0x1f0 [ 48.480343] i915_gem_gtt_prepare_pages+0xb8/0x170 [i915] [ 48.480465] huge_get_pages+0x232/0x2b0 [i915] [ 48.480590] ____i915_gem_object_get_pages+0x40/0xb0 [i915] [ 48.480712] __i915_gem_object_get_pages+0x90/0xa0 [i915] [ 48.480834] i915_gem_object_prepare_write+0x2d6/0x330 [i915] [ 48.480955] create_test_object.isra.54+0x1a9/0x3e0 [i915] [ 48.481075] igt_shared_ctx_exec+0x365/0x3c0 [i915] [ 48.481210] __i915_subtests.cold.4+0x30/0x92 [i915] [ 48.481341] __run_selftests.cold.3+0xa9/0x119 [i915] [ 48.481466] i915_live_selftests+0x3c/0x70 [i915] [ 48.481583] i915_pci_probe+0xe7/0x220 [i915] [ 48.481620] pci_device_probe+0xe0/0x180 [ 48.481665] really_probe+0x163/0x4e0 [ 48.481710] device_driver_attach+0x85/0x90 [ 48.481750] __driver_attach+0xa5/0x180 [ 48.481796] bus_for_each_dev+0xda/0x130 [ 48.481831] bus_add_driver+0x205/0x2e0 [ 48.481882] driver_register+0xca/0x140 [ 48.481927] do_one_initcall+0x6c/0x1af [ 48.481970] do_init_module+0x106/0x350 [ 48.482010] load_module+0x3d2c/0x3ea0 [ 48.482058] __do_sys_finit_module+0x110/0x180 [ 48.482102] do_syscall_64+0x62/0x1f0 [ 48.482147] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.482190] [ 48.482224] Freed by task 37: [ 48.482273] save_stack+0x19/0x80 [ 48.482318] __kasan_slab_free+0x12e/0x180 [ 48.482363] kmem_cache_free+0x70/0x140 [ 48.482406] __free_iova+0x1d/0x30 [ 48.482445] fq_ring_free+0x15a/0x1a0 [ 48.482490] queue_iova+0x19c/0x1f0 [ 48.482624] cleanup_page_dma.isra.64+0x62/0xb0 [i915] [ 48.482749] __gen8_ppgtt_cleanup+0x63/0x80 [i915] [ 48.482873] __gen8_ppgtt_cleanup+0x42/0x80 [i915] [ 48.482999] __gen8_ppgtt_clear+0x433/0x4b0 [i915] [ 48.483123] __gen8_ppgtt_clear+0x462/0x4b0 [i915] [ 48.483250] i915_vma_unbind+0x1e2/0x240 [i915] [ 48.483378] i915_vma_destroy+0x3a/0x280 [i915] [ 48.483500] __i915_gem_free_objects+0xf0/0x2d0 [i915] [ 48.483622] __i915_gem_free_work+0x41/0xa0 [i915] [ 48.483659] process_one_work+0x495/0x710 [ 48.483704] worker_thread+0x4c7/0x6f0 [ 48.483748] kthread+0x1b2/0x1d0 [ 48.483787] ret_from_fork+0x1f/0x30 [ 48.483831] [ 48.483868] The buggy address belongs to the object at ffff88870fc19000 [ 48.483868] which belongs to the cache iommu_iova of size 40 [ 48.483920] The buggy address is located 32 bytes inside of [ 48.483920] 40-byte region [ffff88870fc19000, ffff88870fc19028) [ 48.483964] The buggy address belongs to the page: [ 48.484006] page:ffffea001c3f0600 refcount:1 mapcount:0 mapping:ffff8888181a91c0 index:0x0 compound_mapcount: 0 [ 48.484045] flags: 0x8000000000010200(slab|head) [ 48.484096] raw: 8000000000010200 ffffea001c421a08 ffffea001c447e88 ffff8888181a91c0 [ 48.484141] raw: 0000000000000000 0000000000120012 00000001ffffffff 0000000000000000 [ 48.484188] page dumped because: kasan: bad access detected [ 48.484230] [ 48.484265] Memory state around the buggy address: [ 48.484314] ffff88870fc18f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484361] ffff88870fc18f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484406] >ffff88870fc19000: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc [ 48.484451] ^ [ 48.484494] ffff88870fc19080: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 48.484530] ffff88870fc19100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108602 Fixes: e60aa7b53845 ("iommu/iova: Extend rbtree node caching") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Robin Murphy <robin.murphy(a)arm.com> Cc: Joerg Roedel <jroedel(a)suse.de> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: <stable(a)vger.kernel.org> # v4.15+ Reviewed-by: Robin Murphy <robin.murphy(a)arm.com> Signed-off-by: Joerg Roedel <jroedel(a)suse.de> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 8413ae54904a..3e1a8a675572 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -137,8 +137,9 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) struct iova *cached_iova; cached_iova = rb_entry(iovad->cached32_node, struct iova, node); - if (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo) { + if (free == cached_iova || + (free->pfn_hi < iovad->dma_32bit_pfn && + free->pfn_lo >= cached_iova->pfn_lo)) { iovad->cached32_node = rb_next(&free->node); iovad->max32_alloc_size = iovad->dma_32bit_pfn; }

5 years, 11 months

1
0
0 0

FAILED: patch "[PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From effa467870c7612012885df4e246bdb8ffd8e44c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <dima(a)arista.com> Date: Tue, 16 Jul 2019 22:38:05 +0100 Subject: [PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue Intel VT-d driver was reworked to use common deferred flushing implementation. Previously there was one global per-cpu flush queue, afterwards - one per domain. Before deferring a flush, the queue should be allocated and initialized. Currently only domains with IOMMU_DOMAIN_DMA type initialize their flush queue. It's probably worth to init it for static or unmanaged domains too, but it may be arguable - I'm leaving it to iommu folks. Prevent queuing an iova flush if the domain doesn't have a queue. The defensive check seems to be worth to keep even if queue would be initialized for all kinds of domains. And is easy backportable. On 4.19.43 stable kernel it has a user-visible effect: previously for devices in si domain there were crashes, on sata devices: BUG: spinlock bad magic on CPU#6, swapper/0/1 lock: 0xffff88844f582008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #1 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x45/0x115 intel_unmap+0x107/0x113 intel_unmap_sg+0x6b/0x76 __ata_qc_complete+0x7f/0x103 ata_qc_complete+0x9b/0x26a ata_qc_complete_multiple+0xd0/0xe3 ahci_handle_port_interrupt+0x3ee/0x48a ahci_handle_port_intr+0x73/0xa9 ahci_single_level_irq_intr+0x40/0x60 __handle_irq_event_percpu+0x7f/0x19a handle_irq_event_percpu+0x32/0x72 handle_irq_event+0x38/0x56 handle_edge_irq+0x102/0x121 handle_irq+0x147/0x15c do_IRQ+0x66/0xf2 common_interrupt+0xf/0xf RIP: 0010:__do_softirq+0x8c/0x2df The same for usb devices that use ehci-pci: BUG: spinlock bad magic on CPU#0, swapper/0/1 lock: 0xffff88844f402008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #4 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x77/0x145 intel_unmap+0x107/0x113 intel_unmap_page+0xe/0x10 usb_hcd_unmap_urb_setup_for_dma+0x53/0x9d usb_hcd_unmap_urb_for_dma+0x17/0x100 unmap_urb_for_dma+0x22/0x24 __usb_hcd_giveback_urb+0x51/0xc3 usb_giveback_urb_bh+0x97/0xde tasklet_action_common.isra.4+0x5f/0xa1 tasklet_action+0x2d/0x30 __do_softirq+0x138/0x2df irq_exit+0x7d/0x8b smp_apic_timer_interrupt+0x10f/0x151 apic_timer_interrupt+0xf/0x20 </IRQ> RIP: 0010:_raw_spin_unlock_irqrestore+0x17/0x39 Cc: David Woodhouse <dwmw2(a)infradead.org> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Lu Baolu <baolu.lu(a)linux.intel.com> Cc: iommu(a)lists.linux-foundation.org Cc: <stable(a)vger.kernel.org> # 4.14+ Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Dmitry Safonov <dima(a)arista.com> Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com> Signed-off-by: Joerg Roedel <jroedel(a)suse.de> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9b1d62d03370..72c6d647bec9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3561,7 +3561,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted)) { + if (intel_iommu_strict || (pdev && pdev->untrusted) || + !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d499b2621239..8413ae54904a 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return !!iovad->fq; +} + static void free_iova_flush_queue(struct iova_domain *iovad) { - if (!iovad->fq) + if (!has_iova_flush_queue(iovad)) return; if (timer_pending(&iovad->fq_timer)) @@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) { + struct iova_fq __percpu *queue; int cpu; atomic64_set(&iovad->fq_flush_start_cnt, 0); atomic64_set(&iovad->fq_flush_finish_cnt, 0); - iovad->fq = alloc_percpu(struct iova_fq); - if (!iovad->fq) + queue = alloc_percpu(struct iova_fq); + if (!queue) return -ENOMEM; iovad->flush_cb = flush_cb; @@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, for_each_possible_cpu(cpu) { struct iova_fq *fq; - fq = per_cpu_ptr(iovad->fq, cpu); + fq = per_cpu_ptr(queue, cpu); fq->head = 0; fq->tail = 0; spin_lock_init(&fq->lock); } + smp_wmb(); + + iovad->fq = queue; + timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); atomic_set(&iovad->fq_timer_on, 0); diff --git a/include/linux/iova.h b/include/linux/iova.h index 781b96ac706f..cd0f1de901a8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return false; +} + static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)

5 years, 11 months

1
0
0 0

FAILED: patch "[PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From effa467870c7612012885df4e246bdb8ffd8e44c Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <dima(a)arista.com> Date: Tue, 16 Jul 2019 22:38:05 +0100 Subject: [PATCH] iommu/vt-d: Don't queue_iova() if there is no flush queue Intel VT-d driver was reworked to use common deferred flushing implementation. Previously there was one global per-cpu flush queue, afterwards - one per domain. Before deferring a flush, the queue should be allocated and initialized. Currently only domains with IOMMU_DOMAIN_DMA type initialize their flush queue. It's probably worth to init it for static or unmanaged domains too, but it may be arguable - I'm leaving it to iommu folks. Prevent queuing an iova flush if the domain doesn't have a queue. The defensive check seems to be worth to keep even if queue would be initialized for all kinds of domains. And is easy backportable. On 4.19.43 stable kernel it has a user-visible effect: previously for devices in si domain there were crashes, on sata devices: BUG: spinlock bad magic on CPU#6, swapper/0/1 lock: 0xffff88844f582008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #1 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x45/0x115 intel_unmap+0x107/0x113 intel_unmap_sg+0x6b/0x76 __ata_qc_complete+0x7f/0x103 ata_qc_complete+0x9b/0x26a ata_qc_complete_multiple+0xd0/0xe3 ahci_handle_port_interrupt+0x3ee/0x48a ahci_handle_port_intr+0x73/0xa9 ahci_single_level_irq_intr+0x40/0x60 __handle_irq_event_percpu+0x7f/0x19a handle_irq_event_percpu+0x32/0x72 handle_irq_event+0x38/0x56 handle_edge_irq+0x102/0x121 handle_irq+0x147/0x15c do_IRQ+0x66/0xf2 common_interrupt+0xf/0xf RIP: 0010:__do_softirq+0x8c/0x2df The same for usb devices that use ehci-pci: BUG: spinlock bad magic on CPU#0, swapper/0/1 lock: 0xffff88844f402008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #4 Call Trace: <IRQ> dump_stack+0x61/0x7e spin_bug+0x9d/0xa3 do_raw_spin_lock+0x22/0x8e _raw_spin_lock_irqsave+0x32/0x3a queue_iova+0x77/0x145 intel_unmap+0x107/0x113 intel_unmap_page+0xe/0x10 usb_hcd_unmap_urb_setup_for_dma+0x53/0x9d usb_hcd_unmap_urb_for_dma+0x17/0x100 unmap_urb_for_dma+0x22/0x24 __usb_hcd_giveback_urb+0x51/0xc3 usb_giveback_urb_bh+0x97/0xde tasklet_action_common.isra.4+0x5f/0xa1 tasklet_action+0x2d/0x30 __do_softirq+0x138/0x2df irq_exit+0x7d/0x8b smp_apic_timer_interrupt+0x10f/0x151 apic_timer_interrupt+0xf/0x20 </IRQ> RIP: 0010:_raw_spin_unlock_irqrestore+0x17/0x39 Cc: David Woodhouse <dwmw2(a)infradead.org> Cc: Joerg Roedel <joro(a)8bytes.org> Cc: Lu Baolu <baolu.lu(a)linux.intel.com> Cc: iommu(a)lists.linux-foundation.org Cc: <stable(a)vger.kernel.org> # 4.14+ Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Dmitry Safonov <dima(a)arista.com> Reviewed-by: Lu Baolu <baolu.lu(a)linux.intel.com> Signed-off-by: Joerg Roedel <jroedel(a)suse.de> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9b1d62d03370..72c6d647bec9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3561,7 +3561,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) freelist = domain_unmap(domain, start_pfn, last_pfn); - if (intel_iommu_strict || (pdev && pdev->untrusted)) { + if (intel_iommu_strict || (pdev && pdev->untrusted) || + !has_iova_flush_queue(&domain->iovad)) { iommu_flush_iotlb_psi(iommu, domain, start_pfn, nrpages, !freelist, 0); /* free iova */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d499b2621239..8413ae54904a 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, } EXPORT_SYMBOL_GPL(init_iova_domain); +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return !!iovad->fq; +} + static void free_iova_flush_queue(struct iova_domain *iovad) { - if (!iovad->fq) + if (!has_iova_flush_queue(iovad)) return; if (timer_pending(&iovad->fq_timer)) @@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad) int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) { + struct iova_fq __percpu *queue; int cpu; atomic64_set(&iovad->fq_flush_start_cnt, 0); atomic64_set(&iovad->fq_flush_finish_cnt, 0); - iovad->fq = alloc_percpu(struct iova_fq); - if (!iovad->fq) + queue = alloc_percpu(struct iova_fq); + if (!queue) return -ENOMEM; iovad->flush_cb = flush_cb; @@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad, for_each_possible_cpu(cpu) { struct iova_fq *fq; - fq = per_cpu_ptr(iovad->fq, cpu); + fq = per_cpu_ptr(queue, cpu); fq->head = 0; fq->tail = 0; spin_lock_init(&fq->lock); } + smp_wmb(); + + iovad->fq = queue; + timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); atomic_set(&iovad->fq_timer_on, 0); diff --git a/include/linux/iova.h b/include/linux/iova.h index 781b96ac706f..cd0f1de901a8 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn); +bool has_iova_flush_queue(struct iova_domain *iovad); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad, { } +bool has_iova_flush_queue(struct iova_domain *iovad) +{ + return false; +} + static inline int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)

5 years, 11 months

1
0
0 0

FAILED: patch "[PATCH] powerpc/mm: Limit rma_size to 1TB when running without HV" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From da0ef93310e67ae6902efded60b6724dab27a5d1 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh <sjitindarsingh(a)gmail.com> Date: Wed, 10 Jul 2019 15:20:18 +1000 Subject: [PATCH] powerpc/mm: Limit rma_size to 1TB when running without HV mode The virtual real mode addressing (VRMA) mechanism is used when a partition is using HPT (Hash Page Table) translation and performs real mode accesses (MSR[IR|DR] = 0) in non-hypervisor mode. In this mode effective address bits 0:23 are treated as zero (i.e. the access is aliased to 0) and the access is performed using an implicit 1TB SLB entry. The size of the RMA (Real Memory Area) is communicated to the guest as the size of the first memory region in the device tree. And because of the mechanism described above can be expected to not exceed 1TB. In the event that the host erroneously represents the RMA as being larger than 1TB, guest accesses in real mode to memory addresses above 1TB will be aliased down to below 1TB. This means that a memory access performed in real mode may differ to one performed in virtual mode for the same memory address, which would likely have unintended consequences. To avoid this outcome have the guest explicitly limit the size of the RMA to the current maximum, which is 1TB. This means that even if the first memory block is larger than 1TB, only the first 1TB should be accessed in real mode. Fixes: c610d65c0ad0 ("powerpc/pseries: lift RTAS limit for hash") Cc: stable(a)vger.kernel.org # v4.16+ Signed-off-by: Suraj Jitindar Singh <sjitindarsingh(a)gmail.com> Tested-by: Satheesh Rajendran <sathnaga(a)linux.vnet.ibm.com> Reviewed-by: David Gibson <david(a)gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Link: https://lore.kernel.org/r/20190710052018.14628-1-sjitindarsingh@gmail.com diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9a5963e07a82..b8ad14bb1170 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1899,11 +1899,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, * * For guests on platforms before POWER9, we clamp the it limit to 1G * to avoid some funky things such as RTAS bugs etc... + * + * On POWER9 we limit to 1TB in case the host erroneously told us that + * the RMA was >1TB. Effective address bits 0:23 are treated as zero + * (meaning the access is aliased to zero i.e. addr = addr % 1TB) + * for virtual real mode addressing and so it doesn't make sense to + * have an area larger than 1TB as it can't be addressed. */ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) { ppc64_rma_size = first_memblock_size; if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000); + else + ppc64_rma_size = min_t(u64, ppc64_rma_size, + 1UL << SID_SHIFT_1T); /* Finally limit subsequent allocations */ memblock_set_current_limit(ppc64_rma_size);

5 years, 11 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: inode: Don't compress if NODATASUM or NODATACOW set" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 42c16da6d684391db83788eb680accd84f6c2083 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <wqu(a)suse.com> Date: Mon, 1 Jul 2019 05:12:46 +0000 Subject: [PATCH] btrfs: inode: Don't compress if NODATASUM or NODATACOW set As btrfs(5) specified: Note If nodatacow or nodatasum are enabled, compression is disabled. If NODATASUM or NODATACOW set, we should not compress the extent. Normally NODATACOW is detected properly in run_delalloc_range() so compression won't happen for NODATACOW. However for NODATASUM we don't have any check, and it can cause compressed extent without csum pretty easily, just by: mkfs.btrfs -f $dev mount $dev $mnt -o nodatasum touch $mnt/foobar mount -o remount,datasum,compress $mnt xfs_io -f -c "pwrite 0 128K" $mnt/foobar And in fact, we have a bug report about corrupted compressed extent without proper data checksum so even RAID1 can't recover the corruption. (https://bugzilla.kernel.org/show_bug.cgi?id=199707) Running compression without proper checksum could cause more damage when corruption happens, as compressed data could make the whole extent unreadable, so there is no need to allow compression for NODATACSUM. The fix will refactor the inode compression check into two parts: - inode_can_compress() As the hard requirement, checked at btrfs_run_delalloc_range(), so no compression will happen for NODATASUM inode at all. - inode_need_compress() As the soft requirement, checked at btrfs_run_delalloc_range() and compress_file_range(). Reported-by: James Harvey <jamespharvey20(a)gmail.com> CC: stable(a)vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo <wqu(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1af069a9a0c7..ee582a36653d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool inode_can_compress(struct inode *inode) +{ + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || + BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + +/* + * Check if the inode needs to be submitted to compression, based on mount + * options, defragmentation, properties or heuristics. + */ static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + if (!inode_can_compress(inode)) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: unexpected compression for ino %llu\n", + btrfs_ino(BTRFS_I(inode))); + return 0; + } /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; @@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - } else if (!inode_need_compress(inode, start, end)) { + } else if (!inode_can_compress(inode) || + !inode_need_compress(inode, start, end)) { ret = cow_file_range(inode, locked_page, start, end, end, page_started, nr_written, 1, NULL); } else {

5 years, 11 months

1
0
0 0

FAILED: patch "[PATCH] btrfs: inode: Don't compress if NODATASUM or NODATACOW set" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 42c16da6d684391db83788eb680accd84f6c2083 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <wqu(a)suse.com> Date: Mon, 1 Jul 2019 05:12:46 +0000 Subject: [PATCH] btrfs: inode: Don't compress if NODATASUM or NODATACOW set As btrfs(5) specified: Note If nodatacow or nodatasum are enabled, compression is disabled. If NODATASUM or NODATACOW set, we should not compress the extent. Normally NODATACOW is detected properly in run_delalloc_range() so compression won't happen for NODATACOW. However for NODATASUM we don't have any check, and it can cause compressed extent without csum pretty easily, just by: mkfs.btrfs -f $dev mount $dev $mnt -o nodatasum touch $mnt/foobar mount -o remount,datasum,compress $mnt xfs_io -f -c "pwrite 0 128K" $mnt/foobar And in fact, we have a bug report about corrupted compressed extent without proper data checksum so even RAID1 can't recover the corruption. (https://bugzilla.kernel.org/show_bug.cgi?id=199707) Running compression without proper checksum could cause more damage when corruption happens, as compressed data could make the whole extent unreadable, so there is no need to allow compression for NODATACSUM. The fix will refactor the inode compression check into two parts: - inode_can_compress() As the hard requirement, checked at btrfs_run_delalloc_range(), so no compression will happen for NODATASUM inode at all. - inode_need_compress() As the soft requirement, checked at btrfs_run_delalloc_range() and compress_file_range(). Reported-by: James Harvey <jamespharvey20(a)gmail.com> CC: stable(a)vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo <wqu(a)suse.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1af069a9a0c7..ee582a36653d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool inode_can_compress(struct inode *inode) +{ + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || + BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + +/* + * Check if the inode needs to be submitted to compression, based on mount + * options, defragmentation, properties or heuristics. + */ static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + if (!inode_can_compress(inode)) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: unexpected compression for ino %llu\n", + btrfs_ino(BTRFS_I(inode))); + return 0; + } /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; @@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - } else if (!inode_need_compress(inode, start, end)) { + } else if (!inode_can_compress(inode) || + !inode_need_compress(inode, start, end)) { ret = cow_file_range(inode, locked_page, start, end, end, page_started, nr_written, 1, NULL); } else {

5 years, 11 months

1
0
0 0

[PATCH stable-4.19 0/2] KVM: nVMX: guest reset fixes

by Vitaly Kuznetsov

Few patches were recently marked for stable@ but commits are not backportable as-is and require a few tweaks. Here is 4.19 stable backport. Jan Kiszka (1): KVM: nVMX: Clear pending KVM_REQ_GET_VMCS12_PAGES when leaving nested Paolo Bonzini (1): KVM: nVMX: do not use dangling shadow VMCS after guest reset arch/x86/kvm/vmx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) -- 2.20.1

5 years, 11 months

5
9
0 0

[PATCH] dax: Fix missed PMD wakeups

by Dan Williams

Ever since the conversion of DAX to the Xarray a RocksDB benchmark has been encountering intermittent lockups. In the failing case a thread that is taking a PMD-fault is awaiting a wakeup while holding the 'mmap_sem' for read. As soon as the next mmap() event occurs that tries to take the 'mmap_sem' for write it causes ps(1) and any new 'mmap_sem' reader to block. Debug shows that there are no outstanding Xarray entry-lock holders in the hang state which indicates that a PTE lock-holder thread caused a PMD thread to wait. When the PTE index-lock is released it may wake the wrong waitqueue depending on how the index hashes. Brute-force fix this by arranging for PTE-aligned indices within a PMD-span to hash to the same waitqueue as the PMD-index. This fix may increase waitqueue contention, but a fix for that is saved for a larger rework. In the meantime this fix is suitable for -stable backports. Link: https://lore.kernel.org/linux-fsdevel/CAPcyv4hwHpX-MkUEqxwdTj7wCCZCN4RV-L4j…> Fixes: b15cd800682f ("dax: Convert page fault handlers to XArray") Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Jan Kara <jack(a)suse.cz> Cc: Boaz Harrosh <openosd(a)gmail.com> Cc: <stable(a)vger.kernel.org> Reported-by: Robert Barror <robert.barror(a)intel.com> Reported-by: Seema Pandit <seema.pandit(a)intel.com> Signed-off-by: Dan Williams <dan.j.williams(a)intel.com> --- fs/dax.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 9fd908f3df32..592944c522b8 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -144,19 +144,14 @@ struct wait_exceptional_entry_queue { struct exceptional_entry_key key; }; -static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, - void *entry, struct exceptional_entry_key *key) +static wait_queue_head_t *dax_index_waitqueue(struct xa_state *xas, + struct exceptional_entry_key *key) { unsigned long hash; unsigned long index = xas->xa_index; - /* - * If 'entry' is a PMD, align the 'index' that we use for the wait - * queue to the start of that PMD. This ensures that all offsets in - * the range covered by the PMD map to the same bit lock. - */ - if (dax_is_pmd_entry(entry)) - index &= ~PG_PMD_COLOUR; + /* PMD-align the index to ensure PTE events wakeup PMD waiters */ + index &= ~PG_PMD_COLOUR; key->xa = xas->xa; key->entry_start = index; @@ -177,17 +172,12 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, return autoremove_wake_function(wait, mode, sync, NULL); } -/* - * @entry may no longer be the entry at the index in the mapping. - * The important information it's conveying is whether the entry at - * this index used to be a PMD entry. - */ -static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) +static void dax_wake_index(struct xa_state *xas, bool wake_all) { struct exceptional_entry_key key; wait_queue_head_t *wq; - wq = dax_entry_waitqueue(xas, entry, &key); + wq = dax_index_waitqueue(xas, &key); /* * Checking for locked entry and prepare_to_wait_exclusive() happens @@ -222,7 +212,7 @@ static void *get_unlocked_entry(struct xa_state *xas) !dax_is_locked(entry)) return entry; - wq = dax_entry_waitqueue(xas, entry, &ewait.key); + wq = dax_index_waitqueue(xas, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xas_unlock_irq(xas); @@ -246,7 +236,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) init_wait(&ewait.wait); ewait.wait.func = wake_exceptional_entry_func; - wq = dax_entry_waitqueue(xas, entry, &ewait.key); + wq = dax_index_waitqueue(xas, &ewait.key); /* * Unlike get_unlocked_entry() there is no guarantee that this * path ever successfully retrieves an unlocked entry before an @@ -263,7 +253,7 @@ static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ if (entry) - dax_wake_entry(xas, entry, false); + dax_wake_index(xas, false); } /* @@ -281,7 +271,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); - dax_wake_entry(xas, entry, false); + dax_wake_index(xas, false); } /* @@ -522,7 +512,7 @@ static void *grab_mapping_entry(struct xa_state *xas, dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ - dax_wake_entry(xas, entry, true); + dax_wake_index(xas, true); mapping->nrexceptional--; entry = NULL; xas_set(xas, index); @@ -915,7 +905,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); - dax_wake_entry(xas, entry, false); + dax_wake_index(xas, false); trace_dax_writeback_one(mapping->host, index, count); return ret;

5 years, 11 months

4
27
0 0

[PATCH 5.2 00/66] 5.2.4-stable review

by Greg Kroah-Hartman

This is the start of the stable review cycle for the 5.2.4 release. There are 66 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Sun 28 Jul 2019 03:21:13 PM UTC. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.2.4-rc1.… or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-5.2.y and the diffstat can be found below. thanks, greg k-h ------------- Pseudo-Shortlog of commits: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Linux 5.2.4-rc1 Damien Le Moal <damien.lemoal(a)wdc.com> block: Limit zone array allocation size Damien Le Moal <damien.lemoal(a)wdc.com> sd_zbc: Fix report zones buffer allocation Paolo Bonzini <pbonzini(a)redhat.com> Revert "kvm: x86: Use task structs fpu field for user" Jan Kiszka <jan.kiszka(a)siemens.com> KVM: nVMX: Clear pending KVM_REQ_GET_VMCS12_PAGES when leaving nested Paolo Bonzini <pbonzini(a)redhat.com> KVM: nVMX: do not use dangling shadow VMCS after guest reset Theodore Ts'o <tytso(a)mit.edu> ext4: allow directory holes Ross Zwisler <zwisler(a)chromium.org> ext4: use jbd2_inode dirty range scoping Ross Zwisler <zwisler(a)chromium.org> jbd2: introduce jbd2_inode dirty range scoping Ross Zwisler <zwisler(a)chromium.org> mm: add filemap_fdatawait_range_keep_errors() Theodore Ts'o <tytso(a)mit.edu> ext4: enforce the immutable flag on open files Darrick J. Wong <darrick.wong(a)oracle.com> ext4: don't allow any modifications to an immutable file Peter Zijlstra <peterz(a)infradead.org> perf/core: Fix race between close() and fork() Alexander Shishkin <alexander.shishkin(a)linux.intel.com> perf/core: Fix exclusive events' grouping Song Liu <songliubraving(a)fb.com> perf script: Assume native_arch for pipe mode Paul Cercueil <paul(a)crapouillou.net> MIPS: lb60: Fix pin mappings Keerthy <j-keerthy(a)ti.com> gpio: davinci: silence error prints in case of EPROBE_DEFER Nishka Dasgupta <nishkadg.linux(a)gmail.com> gpiolib: of: fix a memory leak in of_gpio_flags_quirks() Linus Walleij <linus.walleij(a)linaro.org> Revert "gpio/spi: Fix spi-gpio regression on active high CS" Chris Wilson <chris(a)chris-wilson.co.uk> dma-buf: Discard old fence_excl on retrying get_fences_rcu for realloc Jérôme Glisse <jglisse(a)redhat.com> dma-buf: balance refcount inbalance Ido Schimmel <idosch(a)mellanox.com> mlxsw: spectrum: Do not process learned records with a dummy FID Maor Gottlieb <maorg(a)mellanox.com> net/mlx5: E-Switch, Fix default encap mode Petr Machata <petrm(a)mellanox.com> mlxsw: spectrum_dcb: Configure DSCP map as the last rule is removed Michael Chan <michael.chan(a)broadcom.com> bnxt_en: Fix VNIC accounting when enabling aRFS on 57500 chips. Aya Levin <ayal(a)mellanox.com> net/mlx5e: Fix error flow in tx reporter diagnose Aya Levin <ayal(a)mellanox.com> net/mlx5e: Fix return value from timeout recover function Saeed Mahameed <saeedm(a)mellanox.com> net/mlx5e: Rx, Fix checksum calculation for new hardware Eli Britstein <elibr(a)mellanox.com> net/mlx5e: Fix port tunnel GRE entropy control Jakub Kicinski <jakub.kicinski(a)netronome.com> net/tls: reject offload of TLS 1.3 Jakub Kicinski <jakub.kicinski(a)netronome.com> net/tls: fix poll ignoring partially copied records Frank de Brabander <debrabander(a)gmail.com> selftests: txring_overwrite: fix incorrect test of mmap() return value Cong Wang <xiyou.wangcong(a)gmail.com> netrom: hold sock when setting skb->destructor Cong Wang <xiyou.wangcong(a)gmail.com> netrom: fix a memory leak in nr_rx_frame() Andreas Steinmetz <ast(a)domdv.de> macsec: fix checksumming after decryption Andreas Steinmetz <ast(a)domdv.de> macsec: fix use-after-free of skb during RX Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com> net: bridge: stp: don't cache eth dest pointer before skb pull Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com> net: bridge: don't cache ether dest pointer on input Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com> net: bridge: mcast: fix stale ipv6 hdr pointer when handling v6 query Nikolay Aleksandrov <nikolay(a)cumulusnetworks.com> net: bridge: mcast: fix stale nsrcs pointer in igmp3/mld2 report handling Aya Levin <ayal(a)mellanox.com> net/mlx5e: IPoIB, Add error path in mlx5_rdma_setup_rn Peter Kosyh <p.kosyh(a)gmail.com> vrf: make sure skb->data contains ip header to make routing Christoph Paasch <cpaasch(a)apple.com> tcp: Reset bytes_acked and bytes_received when disconnecting Eric Dumazet <edumazet(a)google.com> tcp: fix tcp_set_congestion_control() use from bpf hook Eric Dumazet <edumazet(a)google.com> tcp: be more careful in tcp_fragment() Takashi Iwai <tiwai(a)suse.de> sky2: Disable MSI on ASUS P6T Xin Long <lucien.xin(a)gmail.com> sctp: not bind the socket in sctp_connect Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com> sctp: fix error handling on stream scheduler initialization David Howells <dhowells(a)redhat.com> rxrpc: Fix send on a connected, but unbound socket Heiner Kallweit <hkallweit1(a)gmail.com> r8169: fix issue with confused RX unit after PHY power-down on RTL8411b Yang Wei <albin_yang(a)163.com> nfc: fix potential illegal memory access Jakub Kicinski <jakub.kicinski(a)netronome.com> net/tls: make sure offload also gets the keys wiped Jose Abreu <Jose.Abreu(a)synopsys.com> net: stmmac: Re-work the queue selection for TSO packets Cong Wang <xiyou.wangcong(a)gmail.com> net_sched: unset TCQ_F_CAN_BYPASS when adding filters Andrew Lunn <andrew(a)lunn.ch> net: phy: sfp: hwmon: Fix scaling of RX power John Hurley <john.hurley(a)netronome.com> net: openvswitch: fix csum updates for MPLS actions Lorenzo Bianconi <lorenzo.bianconi(a)redhat.com> net: neigh: fix multiple neigh timer scheduling Florian Westphal <fw(a)strlen.de> net: make skb_dst_force return true when dst is refcounted Baruch Siach <baruch(a)tkos.co.il> net: dsa: mv88e6xxx: wait after reset deactivation Justin Chen <justinpopo6(a)gmail.com> net: bcmgenet: use promisc for unsupported filters Ido Schimmel <idosch(a)mellanox.com> ipv6: Unlink sibling route in case of failure David Ahern <dsahern(a)gmail.com> ipv6: rt6_check should return NULL if 'from' is NULL Matteo Croce <mcroce(a)redhat.com> ipv4: don't set IPv6 only flags to IPv4 addresses Eric Dumazet <edumazet(a)google.com> igmp: fix memory leak in igmpv3_del_delrec() Haiyang Zhang <haiyangz(a)microsoft.com> hv_netvsc: Fix extra rcu_read_unlock in netvsc_recv_callback() Taehee Yoo <ap420073(a)gmail.com> caif-hsi: fix possible deadlock in cfhsi_exit_module() Brian King <brking(a)linux.vnet.ibm.com> bnx2x: Prevent load reordering in tx completion processing ------------- Diffstat: Makefile | 4 +- arch/mips/jz4740/board-qi_lb60.c | 16 +-- arch/x86/include/asm/kvm_host.h | 7 +- arch/x86/kvm/vmx/nested.c | 10 +- arch/x86/kvm/x86.c | 4 +- block/blk-zoned.c | 46 ++++--- drivers/dma-buf/dma-buf.c | 1 + drivers/dma-buf/reservation.c | 4 + drivers/gpio/gpio-davinci.c | 5 +- drivers/gpio/gpiolib-of.c | 10 +- drivers/net/caif/caif_hsi.c | 2 +- drivers/net/dsa/mv88e6xxx/chip.c | 2 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 3 + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 57 ++++----- drivers/net/ethernet/marvell/sky2.c | 7 ++ drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 7 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 5 - .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++ .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 9 +- .../net/ethernet/mellanox/mlx5/core/lib/port_tun.c | 23 +--- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 16 +-- drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c | 10 ++ .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 6 + drivers/net/ethernet/realtek/r8169.c | 137 +++++++++++++++++++++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 28 +++-- drivers/net/hyperv/netvsc_drv.c | 1 - drivers/net/macsec.c | 6 +- drivers/net/phy/sfp.c | 2 +- drivers/net/vrf.c | 58 +++++---- drivers/scsi/sd_zbc.c | 104 +++++++++++----- fs/ext4/dir.c | 19 ++- fs/ext4/ext4_jbd2.h | 12 +- fs/ext4/file.c | 4 + fs/ext4/inode.c | 24 +++- fs/ext4/ioctl.c | 46 ++++++- fs/ext4/move_extent.c | 3 +- fs/ext4/namei.c | 45 +++++-- fs/jbd2/commit.c | 23 +++- fs/jbd2/journal.c | 4 + fs/jbd2/transaction.c | 49 ++++---- include/linux/blkdev.h | 5 + include/linux/fs.h | 2 + include/linux/jbd2.h | 22 ++++ include/linux/mlx5/mlx5_ifc.h | 3 +- include/linux/perf_event.h | 5 + include/net/dst.h | 5 +- include/net/tcp.h | 8 +- include/net/tls.h | 1 + kernel/events/core.c | 83 ++++++++++--- mm/filemap.c | 22 ++++ net/bridge/br_input.c | 8 +- net/bridge/br_multicast.c | 23 ++-- net/bridge/br_stp_bpdu.c | 3 +- net/core/filter.c | 2 +- net/core/neighbour.c | 2 + net/ipv4/devinet.c | 8 ++ net/ipv4/igmp.c | 8 +- net/ipv4/tcp.c | 6 +- net/ipv4/tcp_cong.c | 6 +- net/ipv4/tcp_output.c | 13 +- net/ipv6/ip6_fib.c | 18 ++- net/ipv6/route.c | 2 +- net/netfilter/nf_queue.c | 6 +- net/netrom/af_netrom.c | 4 +- net/nfc/nci/data.c | 2 +- net/openvswitch/actions.c | 6 +- net/rxrpc/af_rxrpc.c | 4 +- net/sched/cls_api.c | 1 + net/sched/sch_fq_codel.c | 2 - net/sched/sch_sfq.c | 2 - net/sctp/socket.c | 24 +--- net/sctp/stream.c | 9 +- net/tls/tls_device.c | 10 +- net/tls/tls_main.c | 4 +- net/tls/tls_sw.c | 3 +- tools/perf/builtin-script.c | 3 +- tools/testing/selftests/net/txring_overwrite.c | 2 +- 82 files changed, 850 insertions(+), 335 deletions(-)

5 years, 11 months

7
77
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror July 2019