Hi all,
This series of backports fixes the SWIOTLB library to maintain the page offset when mapping a DMA address. The bug that motivated this patch series manifested when running a 5.4 kernel as a SEV guest with an NVMe device. However, any device that infers information from the page offset and is accessed through the SWIOTLB will benefit from this bug fix.
Jianxiong Gao (7): driver core: add a min_align_mask field to struct device_dma_parameters swiotlb: add a io_tlb_offset helper swiotlb: factor out a nr_slots helper swiotlb: clean up swiotlb_tbl_unmap_single swiotlb: refactor swiotlb_tbl_map_single swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single nvme-pci: set min_align_mask
Linus Torvalds (1): Linux 5.4
Makefile | 2 +- drivers/nvme/host/pci.c | 1 + include/linux/device.h | 1 + include/linux/dma-mapping.h | 16 +++ include/linux/swiotlb.h | 3 +- kernel/dma/swiotlb.c | 262 ++++++++++++++++++++---------------- 6 files changed, 165 insertions(+), 120 deletions(-)
'commit 36950f2da1ea ("driver core: add a min_align_mask field to struct device_dma_parameters")'
Some devices rely on the address offset in a page to function correctly (NVMe driver as an example). These devices may use a different page size than the Linux kernel. The address offset has to be preserved upon mapping, and in order to do so, we need to record the page_offset_mask first.
Signed-off-by: Jianxiong Gao jxgao@google.com Signed-off-by: Christoph Hellwig hch@lst.de --- include/linux/device.h | 1 + include/linux/dma-mapping.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+)
diff --git a/include/linux/device.h b/include/linux/device.h index 297239a08bb7..2d30a6d7249e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -993,6 +993,7 @@ struct device_dma_parameters { * sg limitations. */ unsigned int max_segment_size; + unsigned int min_align_mask; unsigned long segment_boundary_mask; };
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4a1c4fca475a..c996cd2e72f8 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -747,6 +747,22 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) return -EIO; }
+static inline unsigned int dma_get_min_align_mask(struct device *dev) +{ + if (dev->dma_parms) + return dev->dma_parms->min_align_mask; + return 0; +} + +static inline int dma_set_min_align_mask(struct device *dev, + unsigned int min_align_mask) +{ + if (WARN_ON_ONCE(!dev->dma_parms)) + return -EIO; + dev->dma_parms->min_align_mask = min_align_mask; + return 0; +} + static inline int dma_get_cache_alignment(void) { #ifdef ARCH_DMA_MINALIGN
From: Christoph Hellwig hch@lst.de 'commit c7fbeca757fe ("swiotlb: factor out an io_tlb_offset helper")'
Replace the very genericly named OFFSET macro with a little inline helper that hardcodes the alignment to the only value ever passed.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Jianxiong Gao jxgao@google.com --- kernel/dma/swiotlb.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 673a2cdb2656..9fa71a91c235 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -49,9 +49,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h>
-#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/* @@ -177,6 +174,11 @@ void swiotlb_print_info(void) bytes >> 20); }
+static inline unsigned long io_tlb_offset(unsigned long val) +{ + return val & (IO_TLB_SEGSIZE - 1); +} + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -226,7 +228,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) __func__, alloc_size, PAGE_SIZE);
for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; @@ -357,7 +359,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) goto cleanup4;
for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; @@ -530,7 +532,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
for (i = index; i < (int) (index + nslots); i++) io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + io_tlb_list[i]; i--) io_tlb_list[i] = ++count; tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
@@ -616,7 +620,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * Step 2: merge the returned slots with the preceding slots, * if available (non zero) */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + io_tlb_list[i]; i--) io_tlb_list[i] = ++count;
io_tlb_used -= nslots;
From: Christoph Hellwig hch@lst.de
'commit c32a77fd1878 ("swiotlb: factor out a nr_slots helper")'
Factor out a helper to find the number of slots for a given size.
Signed-off-by: Christoph Hellwig hch@lst.de Acked-by: Jianxiong Gao jxgao@google.com Tested-by: Jianxiong Gao jxgao@google.com Signed-off-by: Konrad Rzeszutek Wilk konrad.wilk@oracle.com Signed-off-by: Jianxiong Gao jxgao@google.com --- include/linux/swiotlb.h | 1 + kernel/dma/swiotlb.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index cde3dc18e21a..ea8e2e28459d 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -28,6 +28,7 @@ enum swiotlb_force { * controllable. */ #define IO_TLB_SHIFT 11 +#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9fa71a91c235..200afa87d135 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -179,6 +179,11 @@ static inline unsigned long io_tlb_offset(unsigned long val) return val & (IO_TLB_SEGSIZE - 1); }
+static inline unsigned long nr_slots(u64 val) +{ + return DIV_ROUND_UP(val, IO_TLB_SIZE); +} + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -477,20 +482,20 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
tbl_dma_addr &= mask;
- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + offset_slots = nr_slots(tbl_dma_addr);
/* * Carefully handle integer overflow which can occur when mask == ~0UL. */ max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT + ? nr_slots(mask + 1) : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
/* * For mappings greater than or equal to a page, we limit the stride * (and hence alignment) to a page size. */ - nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + nslots = nr_slots(alloc_size); if (alloc_size >= PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else @@ -586,7 +591,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, enum dma_data_direction dir, unsigned long attrs) { unsigned long flags; - int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int i, count, nslots = nr_slots(alloc_size); int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = io_tlb_orig_addr[index];
From: Christoph Hellwig hch@lst.de
'commit ca10d0f8e530 ("swiotlb: clean up swiotlb_tbl_unmap_single")'
Remove a layer of pointless indentation, replace a hard to follow ternary expression with a plain if/else.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Jianxiong Gao jxgao@google.com --- kernel/dma/swiotlb.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 200afa87d135..5484469fa5f3 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -610,28 +610,28 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * with slots below and above the pool being returned. */ spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; - io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && - io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - - io_tlb_used -= nslots; + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = io_tlb_list[index + nslots]; + else + count = 0; + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + io_tlb_used -= nslots; spin_unlock_irqrestore(&io_tlb_lock, flags); }
From: Christoph Hellwig hch@lst.de
'commit 26a7e094783d ("swiotlb: refactor swiotlb_tbl_map_single")'
Split out a bunch of a self-contained helpers to make the function easier to follow.
Signed-off-by: Christoph Hellwig hch@lst.de Acked-by: Jianxiong Gao jxgao@google.com Tested-by: Jianxiong Gao jxgao@google.com Signed-off-by: Konrad Rzeszutek Wilk konrad.wilk@oracle.com Signed-off-by: Jianxiong Gao jxgao@google.com --- include/linux/swiotlb.h | 2 +- kernel/dma/swiotlb.c | 183 +++++++++++++++++++--------------------- 2 files changed, 90 insertions(+), 95 deletions(-)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ea8e2e28459d..66aceb588266 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -45,7 +45,7 @@ enum dma_sync_target { SYNC_FOR_DEVICE = 1, };
-extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, +extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, phys_addr_t phys, size_t mapping_size, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 5484469fa5f3..dbd6cda6ace6 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -449,137 +449,132 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } }
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, - size_t mapping_size, - size_t alloc_size, - enum dma_data_direction dir, - unsigned long attrs) +#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) +/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ +static inline unsigned long get_max_slots(unsigned long boundary_mask) { - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - unsigned long tmp_io_tlb_used; - - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); - - if (mapping_size > alloc_size) { - dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } - - mask = dma_get_seg_boundary(hwdev); + if (boundary_mask == ~0UL) + return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + return nr_slots(boundary_mask + 1); +}
- tbl_dma_addr &= mask; +static unsigned int wrap_index(unsigned int index) +{ + if (index >= io_tlb_nslabs) + return 0; + return index; +}
- offset_slots = nr_slots(tbl_dma_addr); +/* + * Find a suitable number of IO TLB entries size that will fit this request and + * allocate a buffer from that IO TLB pool. + */ +static int find_slots(struct device *dev, + dma_addr_t tbl_dma_addr, size_t alloc_size) +{ + unsigned long boundary_mask = dma_get_seg_boundary(dev); + tbl_dma_addr = tbl_dma_addr & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int nslots = nr_slots(alloc_size), stride = 1; + unsigned int index, wrap, count = 0, i; + unsigned long flags;
- /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? nr_slots(mask + 1) - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + BUG_ON(!nslots);
/* * For mappings greater than or equal to a page, we limit the stride * (and hence alignment) to a page size. */ - nslots = nr_slots(alloc_size); if (alloc_size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); + stride <<= (PAGE_SHIFT - IO_TLB_SHIFT);
- /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ spin_lock_irqsave(&io_tlb_lock, flags); - if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) goto not_found;
- index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - + index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - /* * If we find a slot that indicates we have 'nslots' number of * contiguous buffers, we allocate the buffers from that slot * and mark the entries as '0' indicating unavailable. */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; - io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && - io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; + if (!iommu_is_span_boundary(index, nslots, + nr_slots(tbl_dma_addr), + max_slots)) { + if (io_tlb_list[index] >= nslots) + goto found; } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; + index = wrap_index(index + stride); } while (index != wrap);
not_found: - tmp_io_tlb_used = io_tlb_used; - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, io_tlb_nslabs, tmp_io_tlb_used); - return (phys_addr_t)DMA_MAPPING_ERROR; + return -1; + found: + for (i = index; i < index + nslots; i++) + io_tlb_list[i] = 0; + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + + /* + * Update the indices to avoid searching in the next round. + */ + if (index + nslots < io_tlb_nslabs) + io_tlb_index = index + nslots; + else + io_tlb_index = 0; io_tlb_used += nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); + return index; +} + +phys_addr_t swiotlb_tbl_map_single(struct device *dev, + dma_addr_t tbl_dma_addr, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned int index, i; + phys_addr_t tlb_addr; + + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + + if (mem_encrypt_active()) + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); + + if (mapping_size > alloc_size) { + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", + mapping_size, alloc_size); + return (phys_addr_t)DMA_MAPPING_ERROR; + }
+ index = find_slots(dev, tbl_dma_addr, alloc_size); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + alloc_size, io_tlb_nslabs, io_tlb_used); + return (phys_addr_t)DMA_MAPPING_ERROR; + } /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); + for (i = 0; i < nr_slots(alloc_size); i++) + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); + + tlb_addr = slot_addr(io_tlb_start, index); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); - return tlb_addr; }
@@ -682,7 +677,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr, }
/* Oh well, have to allocate and map a bounce buffer. */ - *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), + *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), *phys, size, size, dir, attrs); if (*phys == (phys_addr_t)DMA_MAPPING_ERROR) return false;
From: Christoph Hellwig hch@lst.de
'commit 16fc3cef33a0 ("swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single")'
swiotlb_tbl_map_single currently nevers sets a tlb_addr that is not aligned to the tlb bucket size. But we're going to add such a case soon, for which this adjustment would be bogus.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Jianxiong Gao jxgao@google.com --- kernel/dma/swiotlb.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dbd6cda6ace6..faf51614f02e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -639,7 +639,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
if (orig_addr == INVALID_PHYS_ADDR) return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
switch (target) { case SYNC_FOR_CPU:
From: Christoph Hellwig hch@lst.de
'commit 1f221a0d0dbf ("swiotlb: respect min_align_mask")'
Respect the min_align_mask in struct device_dma_parameters in swiotlb.
There are two parts to it: 1) for the lower bits of the alignment inside the io tlb slot, just extent the size of the allocation and leave the start of the slot empty 2) for the high bits ensure we find a slot that matches the high bits of the alignment to avoid wasting too much memory
Based on an earlier patch from Jianxiong Gao jxgao@google.com.
Signed-off-by: Christoph Hellwig hch@lst.de Acked-by: Jianxiong Gao jxgao@google.com Tested-by: Jianxiong Gao jxgao@google.com Signed-off-by: Konrad Rzeszutek Wilk konrad.wilk@oracle.com Signed-off-by: Jianxiong Gao jxgao@google.com --- kernel/dma/swiotlb.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index faf51614f02e..e3719c623cc4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -450,6 +450,15 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, }
#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) + +/* + * Return the offset into a iotlb slot required to keep the device happy. + */ +static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); +} + /* * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. */ @@ -471,24 +480,28 @@ static unsigned int wrap_index(unsigned int index) * Find a suitable number of IO TLB entries size that will fit this request and * allocate a buffer from that IO TLB pool. */ -static int find_slots(struct device *dev, - dma_addr_t tbl_dma_addr, size_t alloc_size) +static int find_slots(struct device *dev, dma_addr_t tbl_dma_addr, + phys_addr_t orig_addr, size_t alloc_size) { unsigned long boundary_mask = dma_get_seg_boundary(dev); tbl_dma_addr = tbl_dma_addr & boundary_mask; unsigned long max_slots = get_max_slots(boundary_mask); - unsigned int nslots = nr_slots(alloc_size), stride = 1; + unsigned int iotlb_align_mask = + dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int nslots = nr_slots(alloc_size), stride; unsigned int index, wrap, count = 0, i; unsigned long flags;
BUG_ON(!nslots);
/* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. + * For mappings with an alignment requirement don't bother looping to + * unaligned slots once we found an aligned one. For allocations of + * PAGE_SIZE or larger only look for page aligned allocations. */ + stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; if (alloc_size >= PAGE_SIZE) - stride <<= (PAGE_SHIFT - IO_TLB_SHIFT); + stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
spin_lock_irqsave(&io_tlb_lock, flags); if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) @@ -496,6 +509,12 @@ static int find_slots(struct device *dev,
index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); do { + if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != + (orig_addr & iotlb_align_mask)) { + index = wrap_index(index + 1); + continue; + } + /* * If we find a slot that indicates we have 'nslots' number of * contiguous buffers, we allocate the buffers from that slot @@ -540,6 +559,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, size_t mapping_size, size_t alloc_size, enum dma_data_direction dir, unsigned long attrs) { + unsigned int offset = swiotlb_align_offset(dev, orig_addr); unsigned int index, i; phys_addr_t tlb_addr;
@@ -555,7 +575,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, return (phys_addr_t)DMA_MAPPING_ERROR; }
- index = find_slots(dev, tbl_dma_addr, alloc_size); + index = find_slots(dev, tbl_dma_addr, orig_addr, alloc_size + offset); if (index == -1) { if (!(attrs & DMA_ATTR_NO_WARN)) dev_warn_ratelimited(dev, @@ -568,10 +588,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nr_slots(alloc_size); i++) + for (i = 0; i < nr_slots(alloc_size + offset); i++) io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
- tlb_addr = slot_addr(io_tlb_start, index); + tlb_addr = slot_addr(io_tlb_start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); @@ -586,8 +606,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, enum dma_data_direction dir, unsigned long attrs) { unsigned long flags; - int i, count, nslots = nr_slots(alloc_size); - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); + int i, count, nslots = nr_slots(alloc_size + offset); + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = io_tlb_orig_addr[index];
/*
From: Christoph Hellwig hch@lst.de
'commit 3d2d861eb03e ("nvme-pci: set min_align_mask")'
The PRP addressing scheme requires all PRP entries except for the first one to have a zero offset into the NVMe controller pages (which can be different from the Linux PAGE_SIZE). Use the min_align_mask device parameter to ensure that swiotlb does not change the address of the buffer modulo the device page size to ensure that the PRPs won't be malformed.
Signed-off-by: Jianxiong Gao jxgao@google.com Signed-off-by: Christoph Hellwig hch@lst.de Tested-by: Jianxiong Gao jxgao@google.com Signed-off-by: Konrad Rzeszutek Wilk konrad.wilk@oracle.com --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 869f462e6b6e..087ae2b2a853 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2576,6 +2576,7 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); + dma_set_min_align_mask(dev->dev, dev->ctrl.page_size - 1);
mutex_unlock(&dev->shutdown_lock);
On Mon, Apr 05, 2021 at 08:51:01PM +0000, Jianxiong Gao wrote:
Hi all,
This series of backports fixes the SWIOTLB library to maintain the page offset when mapping a DMA address. The bug that motivated this patch series manifested when running a 5.4 kernel as a SEV guest with an NVMe device. However, any device that infers information from the page offset and is accessed through the SWIOTLB will benefit from this bug fix.
Jianxiong Gao (7): driver core: add a min_align_mask field to struct device_dma_parameters swiotlb: add a io_tlb_offset helper swiotlb: factor out a nr_slots helper swiotlb: clean up swiotlb_tbl_unmap_single swiotlb: refactor swiotlb_tbl_map_single swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single nvme-pci: set min_align_mask
Linus Torvalds (1): Linux 5.4
This is clearly wrong :)
This series also doesn't apply cleanly, what did you use as a base?
Also, why are the sign-offs on individual patches different between your patches and upstream?
linux-stable-mirror@lists.linaro.org