GPU drivers need this in their shrinkers, to be able to throw out
mmap'ed buffers. Note that we also need dma_resv_lock in shrinkers,
but that loop is resolved by trylocking in shrinkers.
So full hierarchy is now (ignore some of the other branches we already
have primed):
mmap_read_lock -> dma_resv -> shrinkers -> i_mmap_lock_write
I hope that's not inconsistent with anything mm or fs does, adding
relevant people.
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: linux-xfs(a)vger.kernel.org
Cc: linux-fsdevel(a)vger.kernel.org
Cc: Thomas Hellström (Intel) <thomas_os(a)shipmail.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: linux-mm(a)kvack.org
Cc: linux-rdma(a)vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
---
drivers/dma-buf/dma-resv.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 0e6675ec1d11..9678162a4ac5 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -104,12 +104,14 @@ static int __init dma_resv_lockdep(void)
struct mm_struct *mm = mm_alloc();
struct ww_acquire_ctx ctx;
struct dma_resv obj;
+ struct address_space mapping;
int ret;
if (!mm)
return -ENOMEM;
dma_resv_init(&obj);
+ address_space_init_once(&mapping);
mmap_read_lock(mm);
ww_acquire_init(&ctx, &reservation_ww_class);
@@ -117,6 +119,9 @@ static int __init dma_resv_lockdep(void)
if (ret == -EDEADLK)
dma_resv_lock_slow(&obj, &ctx);
fs_reclaim_acquire(GFP_KERNEL);
+ /* for unmap_mapping_range on trylocked buffer objects in shrinkers */
+ i_mmap_lock_write(&mapping);
+ i_mmap_unlock_write(&mapping);
#ifdef CONFIG_MMU_NOTIFIER
lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
__dma_fence_might_wait();
--
2.27.0
Am 09.09.20 um 09:57 schrieb Tian Tao:
> Fix kernel-doc warnings.
> drivers/gpu/drm/scheduler/sched_fence.c:110: warning: Function parameter or
> member 'f' not described in 'drm_sched_fence_release_scheduled'
> drivers/gpu/drm/scheduler/sched_fence.c:110: warning: Excess function
> parameter 'fence' description in 'drm_sched_fence_release_scheduled'
>
> Signed-off-by: Tian Tao <tiantao6(a)hisilicon.com>
Reviewed-by: Christian König <christian.koenig(a)amd.com>
> ---
> drivers/gpu/drm/scheduler/sched_fence.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index 8b45c3a1b84..69de2c7 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -101,7 +101,7 @@ static void drm_sched_fence_free(struct rcu_head *rcu)
> /**
> * drm_sched_fence_release_scheduled - callback that fence can be freed
> *
> - * @fence: fence
> + * @f: fence
> *
> * This function is called when the reference count becomes zero.
> * It just RCU schedules freeing up the fence.
Hi Tomasz,
On 07.09.2020 15:07, Tomasz Figa wrote:
> On Fri, Sep 4, 2020 at 3:35 PM Marek Szyprowski
> <m.szyprowski(a)samsung.com> wrote:
>> Use recently introduced common wrappers operating directly on the struct
>> sg_table objects and scatterlist page iterators to make the code a bit
>> more compact, robust, easier to follow and copy/paste safe.
>>
>> No functional change, because the code already properly did all the
>> scatterlist related calls.
>>
>> Signed-off-by: Marek Szyprowski <m.szyprowski(a)samsung.com>
>> Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
>> ---
>> .../common/videobuf2/videobuf2-dma-contig.c | 34 ++++++++-----------
>> .../media/common/videobuf2/videobuf2-dma-sg.c | 32 +++++++----------
>> .../common/videobuf2/videobuf2-vmalloc.c | 12 +++----
>> 3 files changed, 31 insertions(+), 47 deletions(-)
>>
> Thanks for the patch! Please see my comments inline.
>
>> diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
>> index ec3446cc45b8..1b242d844dde 100644
>> --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
>> +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
>> @@ -58,10 +58,10 @@ static unsigned long vb2_dc_get_contiguous_size(struct sg_table *sgt)
>> unsigned int i;
>> unsigned long size = 0;
>>
>> - for_each_sg(sgt->sgl, s, sgt->nents, i) {
>> + for_each_sgtable_dma_sg(sgt, s, i) {
>> if (sg_dma_address(s) != expected)
>> break;
>> - expected = sg_dma_address(s) + sg_dma_len(s);
>> + expected += sg_dma_len(s);
>> size += sg_dma_len(s);
>> }
>> return size;
>> @@ -103,8 +103,7 @@ static void vb2_dc_prepare(void *buf_priv)
>> if (!sgt)
>> return;
>>
>> - dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->orig_nents,
>> - buf->dma_dir);
>> + dma_sync_sgtable_for_device(buf->dev, sgt, buf->dma_dir);
>> }
>>
>> static void vb2_dc_finish(void *buf_priv)
>> @@ -115,7 +114,7 @@ static void vb2_dc_finish(void *buf_priv)
>> if (!sgt)
>> return;
>>
>> - dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->orig_nents, buf->dma_dir);
>> + dma_sync_sgtable_for_cpu(buf->dev, sgt, buf->dma_dir);
>> }
>>
>> /*********************************************/
>> @@ -275,8 +274,8 @@ static void vb2_dc_dmabuf_ops_detach(struct dma_buf *dbuf,
>> * memory locations do not require any explicit cache
>> * maintenance prior or after being used by the device.
>> */
>> - dma_unmap_sg_attrs(db_attach->dev, sgt->sgl, sgt->orig_nents,
>> - attach->dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
>> + dma_unmap_sgtable(db_attach->dev, sgt, attach->dma_dir,
>> + DMA_ATTR_SKIP_CPU_SYNC);
>> sg_free_table(sgt);
>> kfree(attach);
>> db_attach->priv = NULL;
>> @@ -301,8 +300,8 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
>>
>> /* release any previous cache */
>> if (attach->dma_dir != DMA_NONE) {
>> - dma_unmap_sg_attrs(db_attach->dev, sgt->sgl, sgt->orig_nents,
>> - attach->dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
>> + dma_unmap_sgtable(db_attach->dev, sgt, attach->dma_dir,
>> + DMA_ATTR_SKIP_CPU_SYNC);
>> attach->dma_dir = DMA_NONE;
>> }
>>
>> @@ -310,9 +309,8 @@ static struct sg_table *vb2_dc_dmabuf_ops_map(
>> * mapping to the client with new direction, no cache sync
>> * required see comment in vb2_dc_dmabuf_ops_detach()
>> */
>> - sgt->nents = dma_map_sg_attrs(db_attach->dev, sgt->sgl, sgt->orig_nents,
>> - dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
>> - if (!sgt->nents) {
>> + if (dma_map_sgtable(db_attach->dev, sgt, dma_dir,
>> + DMA_ATTR_SKIP_CPU_SYNC)) {
>> pr_err("failed to map scatterlist\n");
>> mutex_unlock(lock);
>> return ERR_PTR(-EIO);
> As opposed to dma_map_sg_attrs(), dma_map_sgtable() now returns an
> error code on its own. Is it expected to ignore it and return -EIO?
Those errors are more or less propagated to userspace and -EIO has been
already widely documented in V4L2 documentation as the error code for
the most of the V4L2 ioctls. I don't want to change it. A possible
-EINVAL returned from dma_map_sgtable() was just one of the 'generic'
error codes, not very descriptive in that case. Probably the main
problem here is that dma_map_sg() and friend doesn't return any error
codes...
> ...
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Dear All,
During the Exynos DRM GEM rework and fixing the issues in the.
drm_prime_sg_to_page_addr_arrays() function [1] I've noticed that most
drivers in DRM framework incorrectly use nents and orig_nents entries of
the struct sg_table.
In case of the most DMA-mapping implementations exchanging those two
entries or using nents for all loops on the scatterlist is harmless,
because they both have the same value. There exists however a DMA-mapping
implementations, for which such incorrect usage breaks things. The nents
returned by dma_map_sg() might be lower than the nents passed as its
parameter and this is perfectly fine. DMA framework or IOMMU is allowed
to join consecutive chunks while mapping if such operation is supported
by the underlying HW (bus, bridge, IOMMU, etc). Example of the case
where dma_map_sg() might return 1 'DMA' chunk for the 4 'physical' pages
is described here [2]
The DMA-mapping framework documentation [3] states that dma_map_sg()
returns the numer of the created entries in the DMA address space.
However the subsequent calls to dma_sync_sg_for_{device,cpu} and
dma_unmap_sg must be called with the original number of entries passed to
dma_map_sg. The common pattern in DRM drivers were to assign the
dma_map_sg() return value to sg_table->nents and use that value for
the subsequent calls to dma_sync_sg_* or dma_unmap_sg functions. Also
the code iterated over nents times to access the pages stored in the
processed scatterlist, while it should use orig_nents as the numer of
the page entries.
I've tried to identify all such incorrect usage of sg_table->nents and
this is a result of my research. It looks that the incorrect pattern has
been copied over the many drivers mainly in the DRM subsystem. Too bad in
most cases it even worked correctly if the system used a simple, linear
DMA-mapping implementation, for which swapping nents and orig_nents
doesn't make any difference. To avoid similar issues in the future, I've
introduced a common wrappers for DMA-mapping calls, which operate directly
on the sg_table objects. I've also added wrappers for iterating over the
scatterlists stored in the sg_table objects and applied them where
possible. This, together with some common DRM prime helpers, allowed me
to almost get rid of all nents/orig_nents usage in the drivers. I hope
that such change makes the code robust, easier to follow and copy/paste
safe.
The biggest TODO is DRM/i915 driver and I don't feel brave enough to fix
it fully. The driver creatively uses sg_table->orig_nents to store the
size of the allocate scatterlist and ignores the number of the entries
returned by dma_map_sg function. In this patchset I only fixed the
sg_table objects exported by dmabuf related functions. I hope that I
didn't break anything there.
Patches are based on top of Linux next-20200903. The required changes to
DMA-mapping framework has been already merged to v5.9-rc3.
If possible I would like ask for merging most of the patches via DRM
tree.
Best regards,
Marek Szyprowski
References:
[1] https://lkml.org/lkml/2020/3/27/555
[2] https://lkml.org/lkml/2020/3/29/65
[3] Documentation/DMA-API-HOWTO.txt
[4] https://lore.kernel.org/linux-iommu/20200512121931.GD20393@lst.de/T/#ma18c9…
Changelog:
v10:
- addressed more issues pointed by Robin Murphy in his review:
* prime: restored WARN_ON() in drm_prime_sg_to_page_addr_arrays()
* armada: simplified cleanup path
* msm: fixed arm64 build
* omapdrm: removed WARN_ON(), which is now in drm_prime_sg_to_page_addr_arrays()
* omapdrm: dropped the incorrect nents/orig_nents patch
* media: pci: also update to use modern DMA_FROM_DEVICE definitions
- dropped merged patches
v9: https://lore.kernel.org/linux-iommu/20200826063316.23486-1-m.szyprowski@sam…
- rebased onto Linux next-20200825, which is based on v5.9-rc2; fixed conflicts
- dropped merged patches
v8:
- rapidio: fixed issues pointed by kbuilt test robot (use of uninitialized
variable
- vb2: rebased after recent changes in the code
v7: https://lore.kernel.org/linux-iommu/20200619103636.11974-1-m.szyprowski@sam…
- changed DMA page interators to standard DMA SG iterators in drm/prime and
videobuf2-dma-contig as suggested by Robin Murphy
- fixed build issues
v6: https://lore.kernel.org/linux-iommu/20200618153956.29558-1-m.szyprowski@sam…
- rebased onto Linux next-20200618, which is based on v5.8-rc1; fixed conflicts
v5: https://lore.kernel.org/linux-iommu/20200513132114.6046-1-m.szyprowski@sams…
- fixed some minor style issues and typos
- fixed lack of the attrs argument in ion, dmabuf, rapidio, fastrpc and
vfio patches
v4: https://lore.kernel.org/linux-iommu/20200512121931.GD20393@lst.de/T/
- added for_each_sgtable_* wrappers and applied where possible
- added drm_prime_get_contiguous_size() and applied where possible
- applied drm_prime_sg_to_page_addr_arrays() where possible to remove page
extraction from sg_table objects
- added documentation for the introduced wrappers
- improved patches description a bit
v3: https://lore.kernel.org/dri-devel/20200505083926.28503-1-m.szyprowski@samsu…
- introduce dma_*_sgtable_* wrappers and use them in all patches
v2: https://lore.kernel.org/linux-iommu/c01c9766-9778-fd1f-f36e-2dc7bd376ba4@ar…
- dropped most of the changes to drm/i915
- added fixes for rcar-du, xen, media and ion
- fixed a few issues pointed by kbuild test robot
- added wide cc: list for each patch
v1: https://lore.kernel.org/linux-iommu/c01c9766-9778-fd1f-f36e-2dc7bd376ba4@ar…
- initial version
Patch summary:
Marek Szyprowski (30):
drm: prime: add common helper to check scatterlist contiguity
drm: prime: use sgtable iterators in
drm_prime_sg_to_page_addr_arrays()
drm: core: fix common struct sg_table related issues
drm: armada: fix common struct sg_table related issues
drm: etnaviv: fix common struct sg_table related issues
drm: exynos: use common helper for a scatterlist contiguity check
drm: exynos: fix common struct sg_table related issues
drm: i915: fix common struct sg_table related issues
drm: lima: fix common struct sg_table related issues
drm: mediatek: use common helper for a scatterlist contiguity check
drm: mediatek: use common helper for extracting pages array
drm: msm: fix common struct sg_table related issues
drm: omapdrm: use common helper for extracting pages array
drm: panfrost: fix common struct sg_table related issues
drm: rockchip: use common helper for a scatterlist contiguity check
drm: rockchip: fix common struct sg_table related issues
drm: tegra: fix common struct sg_table related issues
drm: v3d: fix common struct sg_table related issues
drm: virtio: fix common struct sg_table related issues
drm: vmwgfx: fix common struct sg_table related issues
drm: xen: fix common struct sg_table related issues
xen: gntdev: fix common struct sg_table related issues
drm: host1x: fix common struct sg_table related issues
drm: rcar-du: fix common struct sg_table related issues
dmabuf: fix common struct sg_table related issues
staging: tegra-vde: fix common struct sg_table related issues
rapidio: fix common struct sg_table related issues
samples: vfio-mdev/mbochs: fix common struct sg_table related issues
media: pci: fix common ALSA DMA-mapping related codes
videobuf2: use sgtable-based scatterlist wrappers
drivers/dma-buf/heaps/heap-helpers.c | 13 ++-
drivers/dma-buf/udmabuf.c | 7 +-
drivers/gpu/drm/armada/armada_gem.c | 24 +++--
drivers/gpu/drm/drm_cache.c | 2 +-
drivers/gpu/drm/drm_gem_cma_helper.c | 23 +----
drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++-
drivers/gpu/drm/drm_prime.c | 91 +++++++++++--------
drivers/gpu/drm/etnaviv/etnaviv_gem.c | 12 +--
drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 15 +--
drivers/gpu/drm/exynos/exynos_drm_g2d.c | 10 +-
drivers/gpu/drm/exynos/exynos_drm_gem.c | 23 +----
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 11 +--
.../gpu/drm/i915/gem/selftests/mock_dmabuf.c | 7 +-
drivers/gpu/drm/lima/lima_gem.c | 11 ++-
drivers/gpu/drm/lima/lima_vm.c | 5 +-
drivers/gpu/drm/mediatek/mtk_drm_gem.c | 37 ++------
drivers/gpu/drm/msm/msm_gem.c | 13 +--
drivers/gpu/drm/msm/msm_gpummu.c | 15 ++-
drivers/gpu/drm/msm/msm_iommu.c | 2 +-
drivers/gpu/drm/omapdrm/omap_gem.c | 14 +--
drivers/gpu/drm/panfrost/panfrost_gem.c | 4 +-
drivers/gpu/drm/panfrost/panfrost_mmu.c | 7 +-
drivers/gpu/drm/rcar-du/rcar_du_vsp.c | 3 +-
drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 42 +++------
drivers/gpu/drm/tegra/gem.c | 27 ++----
drivers/gpu/drm/tegra/plane.c | 15 +--
drivers/gpu/drm/v3d/v3d_mmu.c | 13 ++-
drivers/gpu/drm/virtio/virtgpu_object.c | 36 +++++---
drivers/gpu/drm/virtio/virtgpu_vq.c | 12 +--
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 17 +---
drivers/gpu/drm/xen/xen_drm_front_gem.c | 2 +-
drivers/gpu/host1x/job.c | 22 ++---
.../common/videobuf2/videobuf2-dma-contig.c | 34 +++----
.../media/common/videobuf2/videobuf2-dma-sg.c | 32 +++----
.../common/videobuf2/videobuf2-vmalloc.c | 12 +--
drivers/media/pci/cx23885/cx23885-alsa.c | 4 +-
drivers/media/pci/cx25821/cx25821-alsa.c | 4 +-
drivers/media/pci/cx88/cx88-alsa.c | 6 +-
drivers/media/pci/saa7134/saa7134-alsa.c | 4 +-
drivers/media/platform/vsp1/vsp1_drm.c | 8 +-
drivers/rapidio/devices/rio_mport_cdev.c | 11 +--
drivers/staging/media/tegra-vde/iommu.c | 4 +-
drivers/xen/gntdev-dmabuf.c | 13 ++-
include/drm/drm_prime.h | 2 +
samples/vfio-mdev/mbochs.c | 3 +-
45 files changed, 280 insertions(+), 406 deletions(-)
--
2.17.1
Hi Tomi,
On 02.09.2020 10:00, Tomi Valkeinen wrote:
> On 01/09/2020 22:33, Robin Murphy wrote:
>> On 2020-08-26 07:32, Marek Szyprowski wrote:
>>> The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function
>>> returns the number of the created entries in the DMA address space.
>>> However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and
>>> dma_unmap_sg must be called with the original number of the entries
>>> passed to the dma_map_sg().
>>>
>>> struct sg_table is a common structure used for describing a non-contiguous
>>> memory buffer, used commonly in the DRM and graphics subsystems. It
>>> consists of a scatterlist with memory pages and DMA addresses (sgl entry),
>>> as well as the number of scatterlist entries: CPU pages (orig_nents entry)
>>> and DMA mapped pages (nents entry).
>>>
>>> It turned out that it was a common mistake to misuse nents and orig_nents
>>> entries, calling DMA-mapping functions with a wrong number of entries or
>>> ignoring the number of mapped entries returned by the dma_map_sg()
>>> function.
>>>
>>> Fix the code to refer to proper nents or orig_nents entries. This driver
>>> checks for a buffer contiguity in DMA address space, so it should test
>>> sg_table->nents entry.
>>>
>>> Signed-off-by: Marek Szyprowski <m.szyprowski(a)samsung.com>
>>> ---
>>> drivers/gpu/drm/omapdrm/omap_gem.c | 6 +++---
>>> 1 file changed, 3 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
>>> index ff0c4b0c3fd0..a7a9a0afe2b6 100644
>>> --- a/drivers/gpu/drm/omapdrm/omap_gem.c
>>> +++ b/drivers/gpu/drm/omapdrm/omap_gem.c
>>> @@ -48,7 +48,7 @@ struct omap_gem_object {
>>> * OMAP_BO_MEM_DMA_API flag set)
>>> *
>>> * - buffers imported from dmabuf (with the OMAP_BO_MEM_DMABUF flag set)
>>> - * if they are physically contiguous (when sgt->orig_nents == 1)
>>> + * if they are physically contiguous (when sgt->nents == 1)
>> Hmm, if this really does mean *physically* contiguous - i.e. if buffers might be shared between
>> DMA-translatable and non-DMA-translatable devices - then these changes might not be appropriate. If
>> not and it only actually means DMA-contiguous, then it would be good to clarify the comments to that
>> effect.
>>
>> Can anyone familiar with omapdrm clarify what exactly the case is here? I know that IOMMUs might be
>> involved to some degree, and I've skimmed the interconnect chapters of enough OMAP TRMs to be scared
>> by the reference to the tiler aperture in the context below :)
> DSS (like many other IPs in OMAP) does not have any MMU/PAT, and can only use contiguous buffers
> (contiguous in the RAM).
>
> There's a special case with TILER (which is not part of DSS but of the memory subsystem, but it's
> still handled internally by the omapdrm driver), which has a PAT. PAT can create a contiguous view
> of scattered pages, and DSS can then use this contiguous view ("tiler aperture", which to DSS looks
> just like normal contiguous memory).
>
> Note that omapdrm does not use dma_map_sg() & co. mentioned in the patch description.
>
> If there's no MMU/PAT, is orig_nents always the same as nents? Or can we have multiple physically
> contiguous pages listed separately in the sgt (so orig_nents > 1) but as the pages form one big
> contiguous area, nents == 1?
Well, when DMA-mapping API is properly used, the difference between
nents and orig_nents is only when the scatterlist have been mapped for DMA.
For the mentioned case, even if the creator of the buffer used only the
pages that are consecutive in the physical memory, he is free to chose
either to set nents/orig_nents to 1 and length to n*PAGE_SIZE or set
nents/orig_nents to n and length to PAGE_SIZE for each. Then the buffer
chunks might be merged, but this is done by the DMA-mapping code. For
your case, without any call to DMA-mapping, you can only assume that the
buffer is contiguous in physical memory if orig_nents is 1.
I've changed the use of nents to orig_nents to make things consistent -
this code operates only on the unmapped buffers. I want to ensure that
anyone who will potentially copy this code, won't make the
nents/orig_nents mistake in the future.
If you don't like it, we can drop this patch, because it won't change
the way the driver works.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Dear All,
During the Exynos DRM GEM rework and fixing the issues in the.
drm_prime_sg_to_page_addr_arrays() function [1] I've noticed that most
drivers in DRM framework incorrectly use nents and orig_nents entries of
the struct sg_table.
In case of the most DMA-mapping implementations exchanging those two
entries or using nents for all loops on the scatterlist is harmless,
because they both have the same value. There exists however a DMA-mapping
implementations, for which such incorrect usage breaks things. The nents
returned by dma_map_sg() might be lower than the nents passed as its
parameter and this is perfectly fine. DMA framework or IOMMU is allowed
to join consecutive chunks while mapping if such operation is supported
by the underlying HW (bus, bridge, IOMMU, etc). Example of the case
where dma_map_sg() might return 1 'DMA' chunk for the 4 'physical' pages
is described here [2]
The DMA-mapping framework documentation [3] states that dma_map_sg()
returns the numer of the created entries in the DMA address space.
However the subsequent calls to dma_sync_sg_for_{device,cpu} and
dma_unmap_sg must be called with the original number of entries passed to
dma_map_sg. The common pattern in DRM drivers were to assign the
dma_map_sg() return value to sg_table->nents and use that value for
the subsequent calls to dma_sync_sg_* or dma_unmap_sg functions. Also
the code iterated over nents times to access the pages stored in the
processed scatterlist, while it should use orig_nents as the numer of
the page entries.
I've tried to identify all such incorrect usage of sg_table->nents and
this is a result of my research. It looks that the incorrect pattern has
been copied over the many drivers mainly in the DRM subsystem. Too bad in
most cases it even worked correctly if the system used a simple, linear
DMA-mapping implementation, for which swapping nents and orig_nents
doesn't make any difference. To avoid similar issues in the future, I've
introduced a common wrappers for DMA-mapping calls, which operate directly
on the sg_table objects. I've also added wrappers for iterating over the
scatterlists stored in the sg_table objects and applied them where
possible. This, together with some common DRM prime helpers, allowed me
to almost get rid of all nents/orig_nents usage in the drivers. I hope
that such change makes the code robust, easier to follow and copy/paste
safe.
The biggest TODO is DRM/i915 driver and I don't feel brave enough to fix
it fully. The driver creatively uses sg_table->orig_nents to store the
size of the allocate scatterlist and ignores the number of the entries
returned by dma_map_sg function. In this patchset I only fixed the
sg_table objects exported by dmabuf related functions. I hope that I
didn't break anything there.
Patches are based on top of Linux next-20200825. The required changes to
DMA-mapping framework has been already merged to v5.8-rc1.
I would like ask for merging of the 1-27 patches via DRM misc tree.
Best regards,
Marek Szyprowski
References:
[1] https://lkml.org/lkml/2020/3/27/555
[2] https://lkml.org/lkml/2020/3/29/65
[3] Documentation/DMA-API-HOWTO.txt
[4] https://lore.kernel.org/linux-iommu/20200512121931.GD20393@lst.de/T/#ma18c9…
Changelog:
v9:
- rebased onto Linux next-20200825, which is based on v5.9-rc2; fixed conflicts
- dropped merged patches
v8:
- rapidio: fixed issues pointed by kbuilt test robot (use of uninitialized
variable
- vb2: rebased after recent changes in the code
v7: https://lore.kernel.org/linux-iommu/20200619103636.11974-1-m.szyprowski@sam…
- changed DMA page interators to standard DMA SG iterators in drm/prime and
videobuf2-dma-contig as suggested by Robin Murphy
- fixed build issues
v6: https://lore.kernel.org/linux-iommu/20200618153956.29558-1-m.szyprowski@sam…
- rebased onto Linux next-20200618, which is based on v5.8-rc1; fixed conflicts
v5: https://lore.kernel.org/linux-iommu/20200513132114.6046-1-m.szyprowski@sams…
- fixed some minor style issues and typos
- fixed lack of the attrs argument in ion, dmabuf, rapidio, fastrpc and
vfio patches
v4: https://lore.kernel.org/linux-iommu/20200512121931.GD20393@lst.de/T/
- added for_each_sgtable_* wrappers and applied where possible
- added drm_prime_get_contiguous_size() and applied where possible
- applied drm_prime_sg_to_page_addr_arrays() where possible to remove page
extraction from sg_table objects
- added documentation for the introduced wrappers
- improved patches description a bit
v3: https://lore.kernel.org/dri-devel/20200505083926.28503-1-m.szyprowski@samsu…
- introduce dma_*_sgtable_* wrappers and use them in all patches
v2: https://lore.kernel.org/linux-iommu/c01c9766-9778-fd1f-f36e-2dc7bd376ba4@ar…
- dropped most of the changes to drm/i915
- added fixes for rcar-du, xen, media and ion
- fixed a few issues pointed by kbuild test robot
- added wide cc: list for each patch
v1: https://lore.kernel.org/linux-iommu/c01c9766-9778-fd1f-f36e-2dc7bd376ba4@ar…
- initial version
Patch summary:
Marek Szyprowski (32):
drm: prime: add common helper to check scatterlist contiguity
drm: prime: use sgtable iterators in
drm_prime_sg_to_page_addr_arrays()
drm: core: fix common struct sg_table related issues
drm: armada: fix common struct sg_table related issues
drm: etnaviv: fix common struct sg_table related issues
drm: exynos: use common helper for a scatterlist contiguity check
drm: exynos: fix common struct sg_table related issues
drm: i915: fix common struct sg_table related issues
drm: lima: fix common struct sg_table related issues
drm: mediatek: use common helper for a scatterlist contiguity check
drm: mediatek: use common helper for extracting pages array
drm: msm: fix common struct sg_table related issues
drm: omapdrm: use common helper for extracting pages array
drm: omapdrm: fix common struct sg_table related issues
drm: panfrost: fix common struct sg_table related issues
drm: rockchip: use common helper for a scatterlist contiguity check
drm: rockchip: fix common struct sg_table related issues
drm: tegra: fix common struct sg_table related issues
drm: v3d: fix common struct sg_table related issues
drm: virtio: fix common struct sg_table related issues
drm: vmwgfx: fix common struct sg_table related issues
drm: xen: fix common struct sg_table related issues
xen: gntdev: fix common struct sg_table related issues
drm: host1x: fix common struct sg_table related issues
drm: rcar-du: fix common struct sg_table related issues
dmabuf: fix common struct sg_table related issues
staging: tegra-vde: fix common struct sg_table related issues
misc: fastrpc: fix common struct sg_table related issues
rapidio: fix common struct sg_table related issues
samples: vfio-mdev/mbochs: fix common struct sg_table related issues
media: pci: fix common ALSA DMA-mapping related codes
videobuf2: use sgtable-based scatterlist wrappers
drivers/dma-buf/heaps/heap-helpers.c | 13 ++-
drivers/dma-buf/udmabuf.c | 7 +-
drivers/gpu/drm/armada/armada_gem.c | 12 +--
drivers/gpu/drm/drm_cache.c | 2 +-
drivers/gpu/drm/drm_gem_cma_helper.c | 23 +----
drivers/gpu/drm/drm_gem_shmem_helper.c | 14 ++-
drivers/gpu/drm/drm_prime.c | 91 +++++++++++--------
drivers/gpu/drm/etnaviv/etnaviv_gem.c | 12 +--
drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 13 +--
drivers/gpu/drm/exynos/exynos_drm_g2d.c | 10 +-
drivers/gpu/drm/exynos/exynos_drm_gem.c | 23 +----
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 11 +--
.../gpu/drm/i915/gem/selftests/mock_dmabuf.c | 7 +-
drivers/gpu/drm/lima/lima_gem.c | 11 ++-
drivers/gpu/drm/lima/lima_vm.c | 5 +-
drivers/gpu/drm/mediatek/mtk_drm_gem.c | 37 ++------
drivers/gpu/drm/msm/msm_gem.c | 13 +--
drivers/gpu/drm/msm/msm_gpummu.c | 14 ++-
drivers/gpu/drm/msm/msm_iommu.c | 2 +-
drivers/gpu/drm/omapdrm/omap_gem.c | 20 ++--
drivers/gpu/drm/panfrost/panfrost_gem.c | 4 +-
drivers/gpu/drm/panfrost/panfrost_mmu.c | 7 +-
drivers/gpu/drm/rcar-du/rcar_du_vsp.c | 3 +-
drivers/gpu/drm/rockchip/rockchip_drm_gem.c | 42 +++------
drivers/gpu/drm/tegra/gem.c | 27 ++----
drivers/gpu/drm/tegra/plane.c | 15 +--
drivers/gpu/drm/v3d/v3d_mmu.c | 13 ++-
drivers/gpu/drm/virtio/virtgpu_object.c | 36 +++++---
drivers/gpu/drm/virtio/virtgpu_vq.c | 12 +--
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 17 +---
drivers/gpu/drm/xen/xen_drm_front_gem.c | 2 +-
drivers/gpu/host1x/job.c | 22 ++---
.../common/videobuf2/videobuf2-dma-contig.c | 34 +++----
.../media/common/videobuf2/videobuf2-dma-sg.c | 32 +++----
.../common/videobuf2/videobuf2-vmalloc.c | 12 +--
drivers/media/pci/cx23885/cx23885-alsa.c | 2 +-
drivers/media/pci/cx25821/cx25821-alsa.c | 2 +-
drivers/media/pci/cx88/cx88-alsa.c | 2 +-
drivers/media/pci/saa7134/saa7134-alsa.c | 2 +-
drivers/media/platform/vsp1/vsp1_drm.c | 8 +-
drivers/misc/fastrpc.c | 4 +-
drivers/rapidio/devices/rio_mport_cdev.c | 11 +--
drivers/staging/media/tegra-vde/iommu.c | 4 +-
drivers/xen/gntdev-dmabuf.c | 13 ++-
include/drm/drm_prime.h | 2 +
samples/vfio-mdev/mbochs.c | 3 +-
46 files changed, 273 insertions(+), 398 deletions(-)
--
2.17.1
On Thu, Aug 27, 2020 at 10:31:41PM +0530, Amit Pundir wrote:
> On Thu, 27 Aug 2020 at 21:34, Greg Kroah-Hartman
> <gregkh(a)linuxfoundation.org> wrote:
> >
> > On Thu, Aug 27, 2020 at 09:31:27AM -0400, Laura Abbott wrote:
> > > On 8/27/20 8:36 AM, Greg Kroah-Hartman wrote:
> > > > The ION android code has long been marked to be removed, now that we
> > > > dma-buf support merged into the real part of the kernel.
> > > >
> > > > It was thought that we could wait to remove the ion kernel at a later
> > > > time, but as the out-of-tree Android fork of the ion code has diverged
> > > > quite a bit, and any Android device using the ion interface uses that
> > > > forked version and not this in-tree version, the in-tree copy of the
> > > > code is abandonded and not used by anyone.
> > > >
> > > > Combine this abandoned codebase with the need to make changes to it in
> > > > order to keep the kernel building properly, which then causes merge
> > > > issues when merging those changes into the out-of-tree Android code, and
> > > > you end up with two different groups of people (the in-kernel-tree
> > > > developers, and the Android kernel developers) who are both annoyed at
> > > > the current situation. Because of this problem, just drop the in-kernel
> > > > copy of the ion code now, as it's not used, and is only causing problems
> > > > for everyone involved.
> > > >
> > > > Cc: "Arve Hjønnevåg" <arve(a)android.com>
> > > > Cc: "Christian König" <christian.koenig(a)amd.com>
> > > > Cc: Christian Brauner <christian(a)brauner.io>
> > > > Cc: Christoph Hellwig <hch(a)infradead.org>
> > > > Cc: Hridya Valsaraju <hridya(a)google.com>
> > > > Cc: Joel Fernandes <joel(a)joelfernandes.org>
> > > > Cc: John Stultz <john.stultz(a)linaro.org>
> > > > Cc: Laura Abbott <laura(a)labbott.name>
> > > > Cc: Martijn Coenen <maco(a)android.com>
> > > > Cc: Shuah Khan <shuah(a)kernel.org>
> > > > Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
> > > > Cc: Suren Baghdasaryan <surenb(a)google.com>
> > > > Cc: Todd Kjos <tkjos(a)android.com>
> > > > Cc: devel(a)driverdev.osuosl.org
> > > > Cc: dri-devel(a)lists.freedesktop.org
> > > > Cc: linaro-mm-sig(a)lists.linaro.org
> > > > Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> > >
> > > We discussed this at the Android MC on Monday and the plan was to
> > > remove it after the next LTS release.
> >
> > I know it was discussed, my point is that it is actually causing
> > problems now (with developers who want to change the internal kernel api
> > hitting issues, and newbies trying to clean up code in ways that isn't
> > exactly optimal wasting maintainer cycles), and that anyone who uses
> > this code, is not actually using this version of the code. Everyone who
> > relies on ion right now, is using the version that is in the Android
> > common kernel tree, which has diverged from this in-kernel way quite a
> > bit now for the reason that we didn't want to take any of those new
> > features in the in-kernel version.
> >
> > So this is a problem that we have caused by just wanting to wait, no one
> > is using this code, combined with it causing problems for the upstream
> > developers.
> >
> > There is nothing "magic" about the last kernel of the year that requires
> > this code to sit here until then. At that point in time, all users
> > will, again, be using the forked Android kernel version, and if we
> > delete this now here, that fork can remain just fine, with the added
> > benifit of it reducing developer workloads here in-kernel.
> >
> > So why wait?
>
> Hi,
>
> I don't know what is the right thing to do here. I just want to
> highlight that AOSP's audio (codec2) HAL depends on the ION system
> heap and it will break AOSP for people who boot mainline on their
> devices, even for just testing purpose like we do in Linaro. Right now
> we need only 1 (Android specific out-of-tree) patch to boot AOSP with
> mainline and Sumit is already trying to upstream that vma naming
> patch. Removal of in-kernel ION, will just add more to that delta.
As AOSP will continue to rely on ION after December of this year, all
you are doing is postponing the inevitable a few more months.
Push back on the Android team to fix up the code to not use ION, they
know this needs to happen.
thanks,
greg k-h
On Thu, Aug 27, 2020 at 09:31:27AM -0400, Laura Abbott wrote:
> On 8/27/20 8:36 AM, Greg Kroah-Hartman wrote:
> > The ION android code has long been marked to be removed, now that we
> > dma-buf support merged into the real part of the kernel.
> >
> > It was thought that we could wait to remove the ion kernel at a later
> > time, but as the out-of-tree Android fork of the ion code has diverged
> > quite a bit, and any Android device using the ion interface uses that
> > forked version and not this in-tree version, the in-tree copy of the
> > code is abandonded and not used by anyone.
> >
> > Combine this abandoned codebase with the need to make changes to it in
> > order to keep the kernel building properly, which then causes merge
> > issues when merging those changes into the out-of-tree Android code, and
> > you end up with two different groups of people (the in-kernel-tree
> > developers, and the Android kernel developers) who are both annoyed at
> > the current situation. Because of this problem, just drop the in-kernel
> > copy of the ion code now, as it's not used, and is only causing problems
> > for everyone involved.
> >
> > Cc: "Arve Hjønnevåg" <arve(a)android.com>
> > Cc: "Christian König" <christian.koenig(a)amd.com>
> > Cc: Christian Brauner <christian(a)brauner.io>
> > Cc: Christoph Hellwig <hch(a)infradead.org>
> > Cc: Hridya Valsaraju <hridya(a)google.com>
> > Cc: Joel Fernandes <joel(a)joelfernandes.org>
> > Cc: John Stultz <john.stultz(a)linaro.org>
> > Cc: Laura Abbott <laura(a)labbott.name>
> > Cc: Martijn Coenen <maco(a)android.com>
> > Cc: Shuah Khan <shuah(a)kernel.org>
> > Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
> > Cc: Suren Baghdasaryan <surenb(a)google.com>
> > Cc: Todd Kjos <tkjos(a)android.com>
> > Cc: devel(a)driverdev.osuosl.org
> > Cc: dri-devel(a)lists.freedesktop.org
> > Cc: linaro-mm-sig(a)lists.linaro.org
> > Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
>
> We discussed this at the Android MC on Monday and the plan was to
> remove it after the next LTS release.
I know it was discussed, my point is that it is actually causing
problems now (with developers who want to change the internal kernel api
hitting issues, and newbies trying to clean up code in ways that isn't
exactly optimal wasting maintainer cycles), and that anyone who uses
this code, is not actually using this version of the code. Everyone who
relies on ion right now, is using the version that is in the Android
common kernel tree, which has diverged from this in-kernel way quite a
bit now for the reason that we didn't want to take any of those new
features in the in-kernel version.
So this is a problem that we have caused by just wanting to wait, no one
is using this code, combined with it causing problems for the upstream
developers.
There is nothing "magic" about the last kernel of the year that requires
this code to sit here until then. At that point in time, all users
will, again, be using the forked Android kernel version, and if we
delete this now here, that fork can remain just fine, with the added
benifit of it reducing developer workloads here in-kernel.
So why wait?
thanks,
greg k-h
On Fri, Aug 21, 2020 at 2:14 AM Kunihiko Hayashi
<hayashi.kunihiko(a)socionext.com> wrote:
>
> On 2020/08/01 4:38, John Stultz wrote:
> > On Fri, Jul 31, 2020 at 2:32 AM Kunihiko Hayashi
> > <hayashi.kunihiko(a)socionext.com> wrote:
> >> On 2020/07/29 4:17, John Stultz wrote:
> >>> Do you have a upstream driver that you plan to make use this new call?
> >>
> >> Unfortunately I don't have an upstream driver using this call.
> >>
> >> This call is called from dma-buf heaps "importer" or "customer",
> >> and I only made an example (do nothing) importer driver
> >> to test the call.
> >>
> >>> We want to have in-tree users of code added.
> >>
> >> I think this is a generic way to use non-default CMA heaps, however,
> >> we need in-tree "importer" drivers to want to use non-default CMA heaps.
> >> I don't find it from now.
> >>
> >
> > Yea, I and again, I do agree this is functionality that will be
> > needed. But we'll need to wait for a user (camera driver, etc which
> > would utilize the reserved cma region) before we can merge it
> > upstream. :( Do let me know if you have an out of tree driver that
> > would make use of it, and we can see what can be done to help upstream
> > things.
>
> Sorry for late.
> Before I prepare or find a user driver as "importer",
> I think something is different in this patch.
>
> This patch makes it possible to treat non-default CMA connected to
> "importer" device with memory-region as dma-buf heaps.
>
> However, the allocated memory from this dma-buf heaps can be used
> for "any" devices, and the "importer" can treat memories from other
> dma-buf heaps.
>
> So, the "importer" and the non-default CMA aren't directly related,
> and I think an "exporter" for the non-default CMA should be enabled.
>
> In paticular, the kernel initializer (as an "exporter") calls
> dma_heap_add_cma() for all CMAs defined in Devicetree, and
> the device files associated with each CMA appear under "/dev/dma_heap/".
> For example:
>
> /dev/dma_heap/linux,cma@10000000
> /dev/dma_heap/linux,cma@11000000
> /dev/dma_heap/linux,cma@12000000
> ...
>
> All of these device files can be fairly allocated to any "importer" device.
>
> Actually I think that the kernel should executes dma_heap_add_cma()
> for ALL defined reserved-memory nodes.
>
> If this idea hasn't been discussed yet and this is reasonable,
> I'll prepare RFC patches.
So yes! An earlier version of the CMA heap I submitted did add all CMA
regions as accessible heaps as you propose here.
However, the concern was that in some cases, those regions are device
specific reserved memory that the driver is probably expecting to have
exclusive access. To allow (sufficiently privileged, or misconfigured)
userland to be able to allocate out of that seemed like it might cause
trouble, and instead we should have CMA regions explicitly exported.
There was some proposal to add a dt property to the reserved memory
section (similar to linux,cma-default) and use that to do the
exporting, but other discussions seemed to prefer having drivers
export it explicitly in a fashion very similar to what your earlier
patch does. The only trouble is we just need an upstream driver to add
such a call in the series before we can merge it.
thanks
-john
These patch series to introduce a new dma heap, chunk heap.
That heap is needed for special HW that requires bulk allocation of
fixed high order pages. For example, 64MB dma-buf pages are made up
to fixed order-4 pages * 1024.
The chunk heap uses alloc_pages_bulk to allocate high order page.
https://lore.kernel.org/linux-mm/20200814173131.2803002-1-minchan@kernel.org
The chunk heap is registered by device tree with alignment and memory node
of contiguous memory allocator(CMA). Alignment defines chunk page size.
For example, alignment 0x1_0000 means chunk page size is 64KB.
The phandle to memory node indicates contiguous memory allocator(CMA).
If device node doesn't have cma, the registration of chunk heap fails.
The patchset includes the following:
- export dma-heap API to register kernel module dma heap.
- add chunk heap implementation.
- document of device tree to register chunk heap
Hyesoo Yu (3):
dma-buf: add missing EXPORT_SYMBOL_GPL() for dma heaps
dma-buf: heaps: add chunk heap to dmabuf heaps
dma-heap: Devicetree binding for chunk heap
.../devicetree/bindings/dma-buf/chunk_heap.yaml | 46 +++++
drivers/dma-buf/dma-heap.c | 2 +
drivers/dma-buf/heaps/Kconfig | 9 +
drivers/dma-buf/heaps/Makefile | 1 +
drivers/dma-buf/heaps/chunk_heap.c | 222 +++++++++++++++++++++
drivers/dma-buf/heaps/heap-helpers.c | 2 +
6 files changed, 282 insertions(+)
create mode 100644 Documentation/devicetree/bindings/dma-buf/chunk_heap.yaml
create mode 100644 drivers/dma-buf/heaps/chunk_heap.c
--
2.7.4
Fix W=1 compile warnings (invalid kerneldoc):
drivers/dma-buf/dma-buf.c:328: warning: Function parameter or member 'dmabuf' not described in 'dma_buf_set_name'
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
---
drivers/dma-buf/dma-buf.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 1699a8e309ef..58564d82a3a2 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -316,9 +316,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
* name of the dma-buf if the same piece of memory is used for multiple
* purpose between different devices.
*
- * @dmabuf [in] dmabuf buffer that will be renamed.
- * @buf: [in] A piece of userspace memory that contains the name of
- * the dma-buf.
+ * @dmabuf: [in] dmabuf buffer that will be renamed.
+ * @buf: [in] A piece of userspace memory that contains the name of
+ * the dma-buf.
*
* Returns 0 on success. If the dma-buf buffer is already attached to
* devices, return -EBUSY.
--
2.17.1
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.
This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.
[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks
v6 -> v7 changes:
- Fix most strict checkpatch comments
David Stevens (3):
virtio: add dma-buf support for exported objects
virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
drm/virtio: Support virtgpu exported resources
drivers/gpu/drm/virtio/virtgpu_drv.c | 3 +
drivers/gpu/drm/virtio/virtgpu_drv.h | 21 ++++++
drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++
drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +++++++++++++++++++++++++-
drivers/gpu/drm/virtio/virtgpu_vq.c | 55 +++++++++++++++
drivers/virtio/Makefile | 2 +-
drivers/virtio/virtio.c | 6 ++
drivers/virtio/virtio_dma_buf.c | 85 +++++++++++++++++++++++
include/linux/virtio.h | 1 +
include/linux/virtio_dma_buf.h | 37 ++++++++++
include/uapi/linux/virtio_gpu.h | 19 +++++
11 files changed, 325 insertions(+), 4 deletions(-)
create mode 100644 drivers/virtio/virtio_dma_buf.c
create mode 100644 include/linux/virtio_dma_buf.h
--
2.28.0.220.ged08abb693-goog
This patchset implements the current proposal for virtio cross-device
resource sharing [1]. It will be used to import virtio resources into
the virtio-video driver currently under discussion [2]. The patch
under consideration to add support in the virtio-video driver is [3].
It uses the APIs from v3 of this series, but the changes to update it
are relatively minor.
This patchset adds a new flavor of dma-bufs that supports querying the
underlying virtio object UUID, as well as adding support for exporting
resources from virtgpu.
[1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
[2] https://markmail.org/thread/p5d3k566srtdtute
[3] https://markmail.org/thread/j4xlqaaim266qpks
v5 -> v6 changes:
- Fix >80 character comment
David Stevens (3):
virtio: add dma-buf support for exported objects
virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
drm/virtio: Support virtgpu exported resources
drivers/gpu/drm/virtio/virtgpu_drv.c | 3 +
drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++++++
drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++
drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +++++++++++++++++++++++++-
drivers/gpu/drm/virtio/virtgpu_vq.c | 55 +++++++++++++++
drivers/virtio/Makefile | 2 +-
drivers/virtio/virtio.c | 6 ++
drivers/virtio/virtio_dma_buf.c | 82 ++++++++++++++++++++++
include/linux/virtio.h | 1 +
include/linux/virtio_dma_buf.h | 37 ++++++++++
include/uapi/linux/virtio_gpu.h | 19 +++++
11 files changed, 321 insertions(+), 4 deletions(-)
create mode 100644 drivers/virtio/virtio_dma_buf.c
create mode 100644 include/linux/virtio_dma_buf.h
--
2.28.0.220.ged08abb693-goog
Unless there are any remaining objections to these patches, what are
the next steps towards getting these merged? Sorry, I'm not familiar
with the workflow for contributing patches to Linux.
Thanks,
David
On Tue, Jun 9, 2020 at 6:53 PM Michael S. Tsirkin <mst(a)redhat.com> wrote:
>
> On Tue, Jun 09, 2020 at 10:25:15AM +0900, David Stevens wrote:
> > This patchset implements the current proposal for virtio cross-device
> > resource sharing [1]. It will be used to import virtio resources into
> > the virtio-video driver currently under discussion [2]. The patch
> > under consideration to add support in the virtio-video driver is [3].
> > It uses the APIs from v3 of this series, but the changes to update it
> > are relatively minor.
> >
> > This patchset adds a new flavor of dma-bufs that supports querying the
> > underlying virtio object UUID, as well as adding support for exporting
> > resources from virtgpu.
>
> Gerd, David, if possible, please test this in configuration with
> virtual VTD enabled but with iommu_platform=off
> to make sure we didn't break this config.
>
>
> Besides that, for virtio parts:
>
> Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
>
>
> > [1] https://markmail.org/thread/2ypjt5cfeu3m6lxu
> > [2] https://markmail.org/thread/p5d3k566srtdtute
> > [3] https://markmail.org/thread/j4xlqaaim266qpks
> >
> > v4 -> v5 changes:
> > - Remove virtio_dma_buf_export_info.
> >
> > David Stevens (3):
> > virtio: add dma-buf support for exported objects
> > virtio-gpu: add VIRTIO_GPU_F_RESOURCE_UUID feature
> > drm/virtio: Support virtgpu exported resources
> >
> > drivers/gpu/drm/virtio/virtgpu_drv.c | 3 +
> > drivers/gpu/drm/virtio/virtgpu_drv.h | 20 ++++++
> > drivers/gpu/drm/virtio/virtgpu_kms.c | 4 ++
> > drivers/gpu/drm/virtio/virtgpu_prime.c | 96 +++++++++++++++++++++++++-
> > drivers/gpu/drm/virtio/virtgpu_vq.c | 55 +++++++++++++++
> > drivers/virtio/Makefile | 2 +-
> > drivers/virtio/virtio.c | 6 ++
> > drivers/virtio/virtio_dma_buf.c | 82 ++++++++++++++++++++++
> > include/linux/virtio.h | 1 +
> > include/linux/virtio_dma_buf.h | 37 ++++++++++
> > include/uapi/linux/virtio_gpu.h | 19 +++++
> > 11 files changed, 321 insertions(+), 4 deletions(-)
> > create mode 100644 drivers/virtio/virtio_dma_buf.c
> > create mode 100644 include/linux/virtio_dma_buf.h
> >
> > --
> > 2.27.0.278.ge193c7cf3a9-goog
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: virtio-dev-unsubscribe(a)lists.oasis-open.org
> For additional commands, e-mail: virtio-dev-help(a)lists.oasis-open.org
>
This is an RFC because I'm still trying to grok the correct behavior.
Consider a dma_fence_array created two two fence and signal_on_any is true.
A reference to dma_fence_array is taken for each waiting fence.
When the client calls dma_fence_wait() only one of the fences is signaled.
The client returns successfully from the wait and puts it's reference to
the array fence but the array fence still remains because of the remaining
un-signaled fence.
Now consider that the unsignaled fence is signaled while the timeline is being
destroyed much later. The timeline destroy calls dma_fence_signal_locked(). The
following sequence occurs:
1) dma_fence_array_cb_func is called
2) array->num_pending is 0 (because it was set to 1 due to signal_on_any) so the
callback function calls dma_fence_put() instead of triggering the irq work
3) The array fence is released which in turn puts the lingering fence which is
then released
4) deadlock with the timeline
I think that we can fix this with the attached patch. Once the fence is
signaled signaling it again in the irq worker shouldn't hurt anything. The only
gotcha might be how the error is propagated - I wasn't quite sure the intent of
clearing it only after getting to the irq worker.
Signed-off-by: Jordan Crouse <jcrouse(a)codeaurora.org>
---
drivers/dma-buf/dma-fence-array.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index d3fbd950be94..b8829b024255 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -46,8 +46,6 @@ static void irq_dma_fence_array_work(struct irq_work *wrk)
{
struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- dma_fence_array_clear_pending_error(array);
-
dma_fence_signal(&array->base);
dma_fence_put(&array->base);
}
@@ -61,10 +59,10 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
dma_fence_array_set_pending_error(array, f->error);
- if (atomic_dec_and_test(&array->num_pending))
- irq_work_queue(&array->work);
- else
- dma_fence_put(&array->base);
+ if (!atomic_dec_and_test(&array->num_pending))
+ dma_fence_array_set_pending_error(array, f->error);
+
+ irq_work_queue(&array->work);
}
static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
--
2.25.1
With commit 72b6ede73623 ("dma-buf.rst: Document why indefinite fences are
a bad idea"), document generation warns:
Documentation/driver-api/dma-buf.rst:182: \
WARNING: Title underline too short.
Repair length of title underline to remove warning.
Fixes: 72b6ede73623 ("dma-buf.rst: Document why indefinite fences are a bad idea")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn(a)gmail.com>
---
Daniel, please pick this minor non-urgent fix to your new documentation.
Documentation/driver-api/dma-buf.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst
index 100bfd227265..13ea0cc0a3fa 100644
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@@ -179,7 +179,7 @@ DMA Fence uABI/Sync File
:internal:
Indefinite DMA Fences
-~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~
At various times &dma_fence with an indefinite time until dma_fence_wait()
finishes have been proposed. Examples include:
--
2.17.1
On Tue, Jul 21, 2020 at 06:16:47PM +0800, Xin He wrote:
> From: Qi Liu <liuqi.16(a)bytedance.com>
>
> We should put the reference count of the fence after calling
> virtio_gpu_cmd_submit(). So add the missing dma_fence_put().
> virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
> vfpriv->ctx_id, buflist, out_fence);
> + dma_fence_put(&out_fence->f);
> virtio_gpu_notify(vgdev);
Pushed to drm-misc-fixes.
thanks,
Gerd
On Mon, Aug 03, 2020 at 08:30:59PM -0400, Joel Fernandes wrote:
> On Mon, Aug 3, 2020 at 10:47 AM 'Kalesh Singh' via kernel-team
> <kernel-team(a)android.com> wrote:
> >
> > Provides a per process hook for the acquisition of file descriptors,
> > despite the method used to obtain the descriptor.
> >
>
> Hi,
> So apart from all of the comments received, I think it is hard to
> understand what the problem is, what the front-end looks like etc.
> Your commit message is 1 line only.
>
> I do remember some of the challenges discussed before, but it would
> describe the problem in the commit message in detail and then discuss
> why this solution is fit. Please read submitting-patches.rst
> especially "2) Describe your changes".
>
> thanks,
>
> - Joel
Thanks for the advice Joel :)
>
>
> > Signed-off-by: Kalesh Singh <kaleshsingh(a)google.com>
> > ---
> > Documentation/filesystems/vfs.rst | 5 +++++
> > fs/file.c | 3 +++
> > include/linux/fs.h | 1 +
> > 3 files changed, 9 insertions(+)
> >
> > diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
> > index ed17771c212b..95b30142c8d9 100644
> > --- a/Documentation/filesystems/vfs.rst
> > +++ b/Documentation/filesystems/vfs.rst
> > @@ -1123,6 +1123,11 @@ otherwise noted.
> > ``fadvise``
> > possibly called by the fadvise64() system call.
> >
> > +``fd_install``
> > + called by the VFS when a file descriptor is installed in the
> > + process's file descriptor table, regardless how the file descriptor
> > + was acquired -- be it via the open syscall, received over IPC, etc.
> > +
> > Note that the file operations are implemented by the specific
> > filesystem in which the inode resides. When opening a device node
> > (character or block special) most filesystems will call special
> > diff --git a/fs/file.c b/fs/file.c
> > index abb8b7081d7a..f5db8622b851 100644
> > --- a/fs/file.c
> > +++ b/fs/file.c
> > @@ -616,6 +616,9 @@ void __fd_install(struct files_struct *files, unsigned int fd,
> > void fd_install(unsigned int fd, struct file *file)
> > {
> > __fd_install(current->files, fd, file);
> > +
> > + if (file->f_op->fd_install)
> > + file->f_op->fd_install(fd, file);
> > }
> >
> > EXPORT_SYMBOL(fd_install);
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index f5abba86107d..b976fbe8c902 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1864,6 +1864,7 @@ struct file_operations {
> > struct file *file_out, loff_t pos_out,
> > loff_t len, unsigned int remap_flags);
> > int (*fadvise)(struct file *, loff_t, loff_t, int);
> > + void (*fd_install)(int, struct file *);
> > } __randomize_layout;
> >
> > struct inode_operations {
> > --
> > 2.28.0.163.g6104cc2f0b6-goog
> >
> > --
> > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe(a)android.com.
> >
On Thu, Jul 16, 2020 at 6:10 PM Kunihiko Hayashi
<hayashi.kunihiko(a)socionext.com> wrote:
>
> Current dma-buf heaps can handle only default CMA. This introduces
> dma_heap_add_cma() function to attach CMA heaps that belongs to a device.
>
> At first, the driver calls of_reserved_mem_device_init() to set
> memory-region property associated with reserved-memory defined as CMA
> to the device. And when the driver calls this dma_heap_add_cma(),
> the CMA will be added to dma-buf heaps.
>
> For example, prepare CMA node named "linux,cma@10000000" and
> specify the node for memory-region property. After the above calls
> in the driver, a device file "/dev/dma_heap/linux,cma@10000000"
> associated with the CMA become available as dma-buf heaps.
>
> Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko(a)socionext.com>
> ---
> drivers/dma-buf/heaps/cma_heap.c | 12 ++++++++++++
> include/linux/dma-heap.h | 9 +++++++++
> 2 files changed, 21 insertions(+)
Hey! Sorry for the slow response on this! I just realized I never replied!
> diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c
> index 626cf7f..5d2442e 100644
> --- a/drivers/dma-buf/heaps/cma_heap.c
> +++ b/drivers/dma-buf/heaps/cma_heap.c
> @@ -162,6 +162,18 @@ static int __add_cma_heap(struct cma *cma, void *data)
> return 0;
> }
>
> +/* add device CMA heap to dmabuf heaps */
> +int dma_heap_add_cma(struct device *dev)
> +{
> + struct cma *cma = dev_get_cma_area(dev);
> +
> + if (!cma)
> + return -ENOMEM;
> +
> + return __add_cma_heap(cma, NULL);
> +}
> +EXPORT_SYMBOL_GPL(dma_heap_add_cma);
> +
> static int add_default_cma_heap(void)
> {
> struct cma *default_cma = dev_get_cma_area(NULL);
> diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h
> index 454e354..16bec7d 100644
> --- a/include/linux/dma-heap.h
> +++ b/include/linux/dma-heap.h
> @@ -56,4 +56,13 @@ void *dma_heap_get_drvdata(struct dma_heap *heap);
> */
> struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info);
>
> +#ifdef CONFIG_DMABUF_HEAPS_CMA
> +/**
> + * dma_heap_add_cma - adds a device CMA heap to dmabuf heaps
> + * @dev: device with a CMA heap to register
> + */
> +int dma_heap_add_cma(struct device *dev);
> +
> +#endif /* CONFIG_DMABUF_HEAPS_CMA */
> +
> #endif /* _DMA_HEAPS_H */
> --
> 2.7.4
Looks sane to me. Being able to expose different multiple CMA heaps
is needed, and I agree this way (as opposed to my earlier dts
appraoch) is probably the best approach. The only bit I'm so-so on is
adding the CMA heap specific call in the dma-heap.h, but at the same
time I can't justify adding a whole new header for a single function.
Do you have a upstream driver that you plan to make use this new call?
We want to have in-tree users of code added.
But if so, feel free to add my:
Acked-by: John Stultz <john.stultz(a)linaro.org>
To this patch when you submit the driver changes.
thanks
-john
Trying to grab dma_resv_lock while in commit_tail before we've done
all the code that leads to the eventual signalling of the vblank event
(which can be a dma_fence) is deadlock-y. Don't do that.
Here the solution is easy because just grabbing locks to read
something races anyway. We don't need to bother, READ_ONCE is
equivalent. And avoids the locking issue.
v2: Also take into account tmz_surface boolean, plus just delete the
old code.
Cc: linux-media(a)vger.kernel.org
Cc: linaro-mm-sig(a)lists.linaro.org
Cc: linux-rdma(a)vger.kernel.org
Cc: amd-gfx(a)lists.freedesktop.org
Cc: intel-gfx(a)lists.freedesktop.org
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Christian König <christian.koenig(a)amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter(a)intel.com>
---
DC-folks, I think this split out patch from my series here
https://lore.kernel.org/dri-devel/20200707201229.472834-1-daniel.vetter@ffw…
should be ready for review/merging. I fixed it up a bit so that it's not
just a gross hack :-)
Cheers, Daniel
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 21ec64fe5527..a20b62b1f2ef 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6959,20 +6959,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
DRM_ERROR("Waiting for fences timed out!");
/*
- * TODO This might fail and hence better not used, wait
- * explicitly on fences instead
- * and in general should be called for
- * blocking commit to as per framework helpers
+ * We cannot reserve buffers here, which means the normal flag
+ * access functions don't work. Paper over this with READ_ONCE,
+ * but maybe the flags are invariant enough that not even that
+ * would be needed.
*/
- r = amdgpu_bo_reserve(abo, true);
- if (unlikely(r != 0))
- DRM_ERROR("failed to reserve buffer before flip\n");
-
- amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
-
- tmz_surface = amdgpu_bo_encrypted(abo);
-
- amdgpu_bo_unreserve(abo);
+ tiling_flags = READ_ONCE(abo->tiling_flags);
+ tmz_surface = READ_ONCE(abo->flags) & AMDGPU_GEM_CREATE_ENCRYPTED;
fill_dc_plane_info_and_addr(
dm->adev, new_plane_state, tiling_flags,
--
2.27.0
Fix W=1 compile warnings (invalid kerneldoc):
drivers/dma-buf/dma-buf.c:328: warning: Function parameter or member 'dmabuf' not described in 'dma_buf_set_name'
Signed-off-by: Krzysztof Kozlowski <krzk(a)kernel.org>
---
drivers/dma-buf/dma-buf.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 1699a8e309ef..58564d82a3a2 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -316,9 +316,9 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
* name of the dma-buf if the same piece of memory is used for multiple
* purpose between different devices.
*
- * @dmabuf [in] dmabuf buffer that will be renamed.
- * @buf: [in] A piece of userspace memory that contains the name of
- * the dma-buf.
+ * @dmabuf: [in] dmabuf buffer that will be renamed.
+ * @buf: [in] A piece of userspace memory that contains the name of
+ * the dma-buf.
*
* Returns 0 on success. If the dma-buf buffer is already attached to
* devices, return -EBUSY.
--
2.17.1