Hello Marek,
I know I am writing this too early ;) Sorry for bringing it up now.
In the function below: arm_iommu_free_attrs(), there is no return value even if we fail in arm_vmregion_find(). There can be buggy driver who can pass wrong buffer information (say, not using the VB2 layer like V4L2 based). Then there is no way to know if the memory is released or not.
The same is the case in coherent allocation, where dma_release_from_coherent() returns 1 for success, but we dont return any information to the caller at all.
Is it not fair to have a return value for free to notify if the memory was unmapped from the DMA/IOMMU map or not?
Regards, Subash
On 10/18/2011 10:49 PM, Marek Szyprowski wrote:
This patch add a complete implementation of DMA-mapping API for devices that have IOMMU support. All DMA-mapping calls are supported.
This patch contains some of the code kindly provided by Krishna Reddy vdumpa@nvidia.com.
Signed-off-by: Marek Szyprowskim.szyprowski@samsung.com Signed-off-by: Kyungmin Parkkyungmin.park@samsung.com
arch/arm/Kconfig | 8 + arch/arm/include/asm/device.h | 4 + arch/arm/include/asm/dma-iommu.h | 35 +++ arch/arm/mm/dma-mapping.c | 606 +++++++++++++++++++++++++++++++++++++- arch/arm/mm/vmregion.h | 2 +- 5 files changed, 650 insertions(+), 5 deletions(-) create mode 100644 arch/arm/include/asm/dma-iommu.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f9d01e0..3caf6ec 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -44,6 +44,14 @@ config ARM config ARM_HAS_SG_CHAIN bool
+config NEED_SG_DMA_LENGTH
- bool
+config ARM_DMA_USE_IOMMU
- select NEED_SG_DMA_LENGTH
- select ARM_HAS_SG_CHAIN
- bool
- config HAVE_PWM bool
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index d3b35d8..bd34378 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -11,6 +11,10 @@ struct dev_archdata { #ifdef CONFIG_DMABOUNCE struct dmabounce_device_info *dmabounce; #endif +#ifdef CONFIG_ARM_DMA_USE_IOMMU
- void *iommu_priv;
- struct dma_iommu_mapping *mapping;
+#endif };
struct pdev_archdata { diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h new file mode 100644 index 0000000..a038c83 --- /dev/null +++ b/arch/arm/include/asm/dma-iommu.h @@ -0,0 +1,35 @@ +#ifndef ASMARM_DMA_IOMMU_H +#define ASMARM_DMA_IOMMU_H
+#ifdef __KERNEL__
+#include<linux/mm_types.h> +#include<linux/scatterlist.h> +#include<linux/dma-debug.h> +#include<linux/kmemcheck.h>
+#include<asm/memory.h>
+struct dma_iommu_mapping {
- /* iommu specific data */
- struct iommu_domain *domain;
- void *bitmap;
- size_t bits;
- unsigned int order;
- dma_addr_t base;
- spinlock_t lock;
- struct kref kref;
+};
+struct dma_iommu_mapping *arm_iommu_create_mapping(dma_addr_t base,
size_t size, int order);
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
+int arm_iommu_attach_device(struct device *dev,
struct dma_iommu_mapping *mapping);
+#endif /* __KERNEL__ */ +#endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index eaa7260..f7bde4f 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -31,6 +31,9 @@ #include<asm/mach/map.h> #include<asm/dma-contiguous.h>
+#include<linux/iommu.h> +#include<asm/dma-iommu.h>
#include "mm.h"
/*
@@ -263,8 +266,10 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base)>> PGDIR_SHIFT;
+#ifndef CONFIG_ARM_DMA_USE_IOMMU if (cpu_architecture()>= CPU_ARCH_ARMv6) return 0; +#endif
consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { @@ -437,7 +442,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) u32 off = CONSISTENT_OFFSET(c->vm_start)& (PTRS_PER_PTE-1);
pte = consistent_pte[idx] + off;
c->vm_pages = page;
c->priv = page;
do { BUG_ON(!pte_none(*pte));
@@ -686,6 +691,14 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, return addr; }
+static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +{
- prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
pgprot_writecombine(prot) :
pgprot_dmacoherent(prot);
- return prot;
+}
- /*
- Allocate DMA-coherent memory space and return both the kernel remapped
- virtual and bus address for that space.
@@ -693,9 +706,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) {
- pgprot_t prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ?
pgprot_writecombine(pgprot_kernel) :
pgprot_dmacoherent(pgprot_kernel);
pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); void *memory;
if (dma_alloc_from_coherent(dev, size, handle,&memory))
@@ -866,6 +877,9 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int i, j;
for_each_sg(sg, s, nents, i) { +#ifdef CONFIG_NEED_SG_DMA_LENGTH
s->dma_length = s->length;
+#endif s->dma_address = ops->map_page(dev, sg_page(s), s->offset, s->length, dir, attrs); if (dma_mapping_error(dev, s->dma_address)) @@ -971,3 +985,587 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init);
+#ifdef CONFIG_ARM_DMA_USE_IOMMU
+/* IOMMU */
+static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
size_t size)
+{
- unsigned int order = get_order(size);
- unsigned int align = 0;
- unsigned int count, start;
- unsigned long flags;
- count = ((PAGE_ALIGN(size)>> PAGE_SHIFT) +
(1<< mapping->order) - 1)>> mapping->order;
- if (order> mapping->order)
align = (1<< (order - mapping->order)) - 1;
- spin_lock_irqsave(&mapping->lock, flags);
- start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0,
count, align);
- if (start> mapping->bits) {
spin_unlock_irqrestore(&mapping->lock, flags);
return ~0;
- }
- bitmap_set(mapping->bitmap, start, count);
- spin_unlock_irqrestore(&mapping->lock, flags);
- return mapping->base + (start<< (mapping->order + PAGE_SHIFT));
+}
+static inline void __free_iova(struct dma_iommu_mapping *mapping,
dma_addr_t addr, size_t size)
+{
- unsigned int start = (addr - mapping->base)>>
(mapping->order + PAGE_SHIFT);
- unsigned int count = ((size>> PAGE_SHIFT) +
(1<< mapping->order) - 1)>> mapping->order;
- unsigned long flags;
- spin_lock_irqsave(&mapping->lock, flags);
- bitmap_clear(mapping->bitmap, start, count);
- spin_unlock_irqrestore(&mapping->lock, flags);
+}
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{
- struct page **pages;
- int count = size>> PAGE_SHIFT;
- int i;
- pages = kzalloc(count * sizeof(struct page*), gfp);
- if (!pages)
return NULL;
- for (i=0; i<count; i++) {
pages[i] = alloc_page(gfp);
if (!pages[i])
goto error;
__dma_clear_buffer(pages[i], PAGE_SIZE);
- }
- return pages;
+error:
- while (--i)
if (pages[i])
__free_pages(pages[i], 0);
- kfree(pages);
- return NULL;
+}
+static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) +{
- int count = size>> PAGE_SHIFT;
- int i;
- for (i=0; i< count; i++)
if (pages[i])
__free_pages(pages[i], 0);
- kfree(pages);
- return 0;
+}
+static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +{
- struct arm_vmregion *c;
- size_t align;
- size_t count = size>> PAGE_SHIFT;
- int bit;
- if (!consistent_pte[0]) {
printk(KERN_ERR "%s: not initialised\n", __func__);
dump_stack();
return NULL;
- }
- /*
* Align the virtual region allocation - maximum alignment is
* a section size, minimum is a page size. This helps reduce
* fragmentation of the DMA space, and also prevents allocations
* smaller than a section from crossing a section boundary.
*/
- bit = fls(size - 1);
- if (bit> SECTION_SHIFT)
bit = SECTION_SHIFT;
- align = 1<< bit;
- /*
* Allocate a virtual address in the consistent mapping region.
*/
- c = arm_vmregion_alloc(&consistent_head, align, size,
gfp& ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
pte_t *pte;
int idx = CONSISTENT_PTE_INDEX(c->vm_start);
int i = 0;
u32 off = CONSISTENT_OFFSET(c->vm_start)& (PTRS_PER_PTE-1);
pte = consistent_pte[idx] + off;
c->priv = pages;
do {
BUG_ON(!pte_none(*pte));
set_pte_ext(pte, mk_pte(pages[i], prot), 0);
pte++;
off++;
i++;
if (off>= PTRS_PER_PTE) {
off = 0;
pte = consistent_pte[++idx];
}
} while (i< count);
dsb();
return (void *)c->vm_start;
- }
- return NULL;
+}
+static dma_addr_t __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) +{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- unsigned int count = PAGE_ALIGN(size)>> PAGE_SHIFT;
- dma_addr_t dma_addr, iova;
- int i, ret = ~0;
- dma_addr = __alloc_iova(mapping, size);
- if (dma_addr == 0)
goto fail;
- iova = dma_addr;
- for (i=0; i<count; i++) {
unsigned int phys = page_to_phys(pages[i]);
ret = iommu_map(mapping->domain, iova, phys, 0, 0);
if (ret< 0)
goto fail;
iova += PAGE_SIZE;
- }
- return dma_addr;
+fail:
- return ~0;
+}
+static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- unsigned int count = PAGE_ALIGN(size)>> PAGE_SHIFT;
- int i;
- iova&= PAGE_MASK;
- for (i=0; i<count; i++)
iommu_unmap(mapping->domain, iova + (i<< PAGE_SHIFT), 0);
- __free_iova(mapping, iova, size);
- return 0;
+}
+static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs)
+{
- pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel);
- struct page **pages;
- void *addr = NULL;
- *handle = ~0;
- size = PAGE_ALIGN(size);
- pages = __iommu_alloc_buffer(dev, size, gfp);
- if (!pages)
return NULL;
- *handle = __iommu_create_mapping(dev, pages, size);
- if (*handle == ~0)
goto err_buffer;
- addr = __iommu_alloc_remap(pages, size, gfp, prot);
- if (!addr)
goto err_mapping;
- return addr;
+err_mapping:
- __iommu_remove_mapping(dev, *handle, size);
+err_buffer:
- __iommu_free_buffer(dev, pages, size);
- return NULL;
+}
+static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
struct dma_attrs *attrs)
+{
- struct arm_vmregion *c;
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
- if (c) {
struct page **pages = c->priv;
unsigned long uaddr = vma->vm_start;
unsigned long usize = vma->vm_end - vma->vm_start;
int i = 0;
do {
int ret;
ret = vm_insert_page(vma, uaddr, pages[i++]);
if (ret) {
printk(KERN_ERR "Remapping memory, error: %d\n", ret);
return ret;
}
uaddr += PAGE_SIZE;
usize -= PAGE_SIZE;
} while (usize> 0);
- }
- return 0;
+}
+/*
- free a page as defined by the above mapping.
- Must not be called with IRQs disabled.
- */
+void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t handle, struct dma_attrs *attrs)
+{
- struct arm_vmregion *c;
- size = PAGE_ALIGN(size);
- c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr);
- if (c) {
struct page **pages = c->priv;
__dma_free_remap(cpu_addr, size);
__iommu_remove_mapping(dev, handle, size);
__iommu_free_buffer(dev, pages, size);
- }
+}
+static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
size_t size, dma_addr_t *handle,
enum dma_data_direction dir)
+{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- dma_addr_t iova;
- int ret = 0;
- unsigned int count, i;
- struct scatterlist *s;
- size = PAGE_ALIGN(size);
- *handle = ~0;
- iova = __alloc_iova(mapping, size);
- if (iova == 0)
return -ENOMEM;
- for (count = 0, s = sg; count< (size>> PAGE_SHIFT); s = sg_next(s))
- {
phys_addr_t phys = page_to_phys(sg_page(s));
unsigned int len = PAGE_ALIGN(s->offset + s->length);
if (!arch_is_coherent())
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
for (i = 0; i< (len>> PAGE_SHIFT); i++)
{
ret = iommu_map(mapping->domain,
iova + (count<< PAGE_SHIFT),
phys + (i<< PAGE_SHIFT), 0, 0);
if (ret< 0)
goto fail;
count++;
}
- }
- *handle = iova;
- return 0;
+fail:
- while (count--)
iommu_unmap(mapping->domain, iova + count * PAGE_SIZE, 0);
- __iommu_remove_mapping(dev, iova, size);
- return ret;
+}
+int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_attrs *attrs)
+{
- struct scatterlist *s = sg, *dma = sg, *start = sg;
- int i, count = 0;
- unsigned int offset = s->offset;
- unsigned int size = s->offset + s->length;
- unsigned int max = dma_get_max_seg_size(dev);
- s->dma_address = ~0;
- s->dma_length = 0;
- for (i = 1; i< nents; i++) {
s->dma_address = ~0;
s->dma_length = 0;
s = sg_next(s);
if (s->offset || (size& ~PAGE_MASK) || size + s->length> max) {
if (__map_sg_chunk(dev, start, size,&dma->dma_address,
dir)< 0)
goto bad_mapping;
dma->dma_address += offset;
dma->dma_length = size - offset;
size = offset = s->offset;
start = s;
dma = sg_next(dma);
count += 1;
}
size += s->length;
- }
- if (__map_sg_chunk(dev, start, size,&dma->dma_address, dir)< 0)
goto bad_mapping;
- dma->dma_address += offset;
- dma->dma_length = size - offset;
- return count+1;
+bad_mapping:
- for_each_sg(sg, s, count, i)
__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
- return 0;
+}
+void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_attrs *attrs)
+{
- struct scatterlist *s;
- int i;
- for_each_sg(sg, s, nents, i) {
if (sg_dma_len(s))
__iommu_remove_mapping(dev, sg_dma_address(s),
sg_dma_len(s));
if (!arch_is_coherent())
__dma_page_dev_to_cpu(sg_page(s), s->offset,
s->length, dir);
- }
+}
+/**
- dma_sync_sg_for_cpu
- @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
- @sg: list of buffers
- @nents: number of buffers to map (returned from dma_map_sg)
- @dir: DMA transfer direction (same as was passed to dma_map_sg)
- */
+void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
+{
- struct scatterlist *s;
- int i;
- for_each_sg(sg, s, nents, i)
if (!arch_is_coherent())
__dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+}
+/**
- dma_sync_sg_for_device
- @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
- @sg: list of buffers
- @nents: number of buffers to map (returned from dma_map_sg)
- @dir: DMA transfer direction (same as was passed to dma_map_sg)
- */
+void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir)
+{
- struct scatterlist *s;
- int i;
- for_each_sg(sg, s, nents, i)
if (!arch_is_coherent())
__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+}
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
+{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- dma_addr_t dma_addr, iova;
- unsigned int phys;
- int ret, len = PAGE_ALIGN(size + offset);
- if (!arch_is_coherent())
__dma_page_cpu_to_dev(page, offset, size, dir);
- dma_addr = iova = __alloc_iova(mapping, len);
- if (iova == 0)
goto fail;
- dma_addr += offset;
- phys = page_to_phys(page);
- while (len> 0) {
ret = iommu_map(mapping->domain, iova, phys, 0, 0);
if (ret< 0)
goto fail;
iova += PAGE_SIZE;
phys += PAGE_SIZE;
len -= PAGE_SIZE;
- }
- return dma_addr;
+fail:
- return ~0;
+}
+static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
+{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- dma_addr_t iova = handle& PAGE_MASK;
- struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
- int offset = handle& ~PAGE_MASK, len = offset + size;
- if (!iova)
return;
- if (!arch_is_coherent())
__dma_page_dev_to_cpu(page, offset, size, dir);
- while (len> 0) {
iommu_unmap(mapping->domain, iova, 0);
iova += PAGE_SIZE;
len -= PAGE_SIZE;
- }
+}
+static void arm_iommu_sync_single_for_cpu(struct device *dev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- dma_addr_t iova = handle& PAGE_MASK;
- struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
- unsigned int offset = handle& ~PAGE_MASK;
- if (!iova)
return;
- if (!arch_is_coherent())
__dma_page_dev_to_cpu(page, offset, size, dir);
+}
+static void arm_iommu_sync_single_for_device(struct device *dev,
dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
- struct dma_iommu_mapping *mapping = dev->archdata.mapping;
- dma_addr_t iova = handle& PAGE_MASK;
- struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
- unsigned int offset = handle& ~PAGE_MASK;
- if (!iova)
return;
- __dma_page_cpu_to_dev(page, offset, size, dir);
+}
+struct dma_map_ops iommu_ops = {
- .alloc = arm_iommu_alloc_attrs,
- .free = arm_iommu_free_attrs,
- .mmap = arm_iommu_mmap_attrs,
- .map_page = arm_iommu_map_page,
- .unmap_page = arm_iommu_unmap_page,
- .sync_single_for_cpu = arm_iommu_sync_single_for_cpu,
- .sync_single_for_device = arm_iommu_sync_single_for_device,
- .map_sg = arm_iommu_map_sg,
- .unmap_sg = arm_iommu_unmap_sg,
- .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu,
- .sync_sg_for_device = arm_iommu_sync_sg_for_device,
+};
+struct dma_iommu_mapping *arm_iommu_create_mapping(dma_addr_t base,
size_t size, int order)
+{
- unsigned int count = (size>> PAGE_SHIFT) - order;
- unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
- struct dma_iommu_mapping *mapping;
- int err = -ENOMEM;
- mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
- if (!mapping)
goto err;
- mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!mapping->bitmap)
goto err2;
- mapping->base = base;
- mapping->bits = bitmap_size;
- mapping->order = order;
- spin_lock_init(&mapping->lock);
- mapping->domain = iommu_domain_alloc();
- if (!mapping->domain)
goto err3;
- kref_init(&mapping->kref);
- return mapping;
+err3:
- kfree(mapping->bitmap);
+err2:
- kfree(mapping);
+err:
- return ERR_PTR(err);
+} +EXPORT_SYMBOL(arm_iommu_create_mapping);
+static void release_iommu_mapping(struct kref *kref) +{
- struct dma_iommu_mapping *mapping =
container_of(kref, struct dma_iommu_mapping, kref);
- iommu_domain_free(mapping->domain);
- kfree(mapping->bitmap);
- kfree(mapping);
+}
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{
- if (mapping)
kref_put(&mapping->kref, release_iommu_mapping);
+} +EXPORT_SYMBOL(arm_iommu_release_mapping);
+int arm_iommu_attach_device(struct device *dev,
struct dma_iommu_mapping *mapping)
+{
- int err;
- err = iommu_attach_device(mapping->domain, dev);
- if (err)
return err;
- kref_get(&mapping->kref);
- dev->archdata.mapping = mapping;
- set_dma_ops(dev,&iommu_ops);
- printk(KERN_INFO "Attached IOMMU controller to %s device.\n", dev_name(dev));
- return 0;
+} +EXPORT_SYMBOL(arm_iommu_attach_device);
+#endif diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index 15e9f04..6bbc402 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h @@ -17,7 +17,7 @@ struct arm_vmregion { struct list_head vm_list; unsigned long vm_start; unsigned long vm_end;
- struct page *vm_pages;
- void *priv; int vm_active; };