The kms paths keep a persistent map active to read and compare the cursor buffer. These maps can race with each other in simple scenario where: a) buffer "a" mapped for update b) buffer "a" mapped for compare c) do the compare d) unmap "a" for compare e) update the cursor f) unmap "a" for update At step "e" the buffer has been unmapped and the read contents is bogus.
Prevent unmapping of active read buffers by simply keeping a count of how many paths have currently active maps and unmap only when the count reaches 0.
v2: Update doc strings
Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4") Cc: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Zack Rusin zack.rusin@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 13 +++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index f42ebc4a7c22..a0e433fbcba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -360,6 +360,8 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) void *virtual; int ret;
+ atomic_inc(&vbo->map_count); + virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); if (virtual) return virtual; @@ -383,11 +385,17 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) */ void vmw_bo_unmap(struct vmw_bo *vbo) { + int map_count; + if (vbo->map.bo == NULL) return;
- ttm_bo_kunmap(&vbo->map); - vbo->map.bo = NULL; + map_count = atomic_dec_return(&vbo->map_count); + + if (!map_count) { + ttm_bo_kunmap(&vbo->map); + vbo->map.bo = NULL; + } }
@@ -421,6 +429,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; xa_init(&vmw_bo->detached_resources); + atomic_set(&vmw_bo->map_count, 0);
params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 62b4342d5f7c..43b5439ec9f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -71,6 +71,8 @@ struct vmw_bo_params { * @map: Kmap object for semi-persistent mappings * @res_tree: RB tree of resources using this buffer object as a backing MOB * @res_prios: Eviction priority counts for attached resources + * @map_count: The number of currently active maps. Will differ from the + * cpu_writers because it includes kernel maps. * @cpu_writers: Number of synccpu write grabs. Protected by reservation when * increased. May be decreased without reservation. * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB @@ -90,6 +92,7 @@ struct vmw_bo { u32 res_prios[TTM_MAX_BO_PRIORITY]; struct xarray detached_resources;
+ atomic_t map_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx;
Make sure that for external buffers mapping goes through the dma_buf interface instead of trying to access pages directly.
External buffers might not provide direct access to readable/writable pages so to make sure the bo's created from external dma_bufs can be read dma_buf interface has to be used.
Fixes crashes in IGT's kms_prime with vgem. Regular desktop usage won't trigger this due to the fact that virtual machines will not have multiple GPUs but it enables better test coverage in IGT.
v2: Fix the diff rectangle computation
Signed-off-by: Zack Rusin zack.rusin@broadcom.com Fixes: b32233acceff ("drm/vmwgfx: Fix prime import/export") Cc: stable@vger.kernel.org # v6.6+ Cc: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v6.9+ --- drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 112 ++++++++++++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 12 +-- 3 files changed, 116 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index 717d624e9a05..4049447d211c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -27,6 +27,8 @@ **************************************************************************/
#include "vmwgfx_drv.h" + +#include "vmwgfx_bo.h" #include <linux/highmem.h>
/* @@ -420,13 +422,103 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, return 0; }
+static void *map_external(struct vmw_bo *bo, struct iosys_map *map) +{ + struct vmw_private *vmw = + container_of(bo->tbo.bdev, struct vmw_private, bdev); + void *ptr = NULL; + int ret; + + if (bo->tbo.base.import_attach) { + ret = dma_buf_vmap(bo->tbo.base.dma_buf, map); + if (ret) { + drm_dbg_driver(&vmw->drm, + "Wasn't able to map external bo!\n"); + goto out; + } + ptr = map->vaddr; + } else { + ptr = vmw_bo_map_and_cache(bo); + } + +out: + return ptr; +} + +static void unmap_external(struct vmw_bo *bo, struct iosys_map *map) +{ + if (bo->tbo.base.import_attach) + dma_buf_vunmap(bo->tbo.base.dma_buf, map); + else + vmw_bo_unmap(bo); +} + +static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset, + u32 dst_stride, struct vmw_bo *src, + u32 src_offset, u32 src_stride, + u32 width_in_bytes, u32 height, + struct vmw_diff_cpy *diff) +{ + struct vmw_private *vmw = + container_of(dst->tbo.bdev, struct vmw_private, bdev); + size_t dst_size = dst->tbo.resource->size; + size_t src_size = src->tbo.resource->size; + struct iosys_map dst_map = {0}; + struct iosys_map src_map = {0}; + int ret, i; + u8 *vsrc; + u8 *vdst; + + vsrc = map_external(src, &src_map); + if (!vsrc) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n"); + ret = -ENOMEM; + goto out; + } + + vdst = map_external(dst, &dst_map); + if (!vdst) { + drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n"); + ret = -ENOMEM; + goto out; + } + + vsrc += src_offset; + vdst += dst_offset; + if (src_stride == dst_stride) { + dst_size -= dst_offset; + src_size -= src_offset; + memcpy(vdst, vsrc, + min(dst_stride * height, min(dst_size, src_size))); + } else { + WARN_ON(dst_stride < width_in_bytes); + for (i = 0; i < height; ++i) { + memcpy(vdst, vsrc, width_in_bytes); + vsrc += src_stride; + vdst += dst_stride; + } + } + + diff->rect.x1 = (dst_offset % dst_stride) / diff->cpp; + diff->rect.y1 = floor(dst_offset / dst_stride); + diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp; + diff->rect.y2 = diff->rect.y1 + height; + + ret = 0; +out: + unmap_external(src, &src_map); + unmap_external(dst, &dst_map); + + return ret; +} + /** * vmw_bo_cpu_blit - in-kernel cpu blit. * - * @dst: Destination buffer object. + * @vmw_dst: Destination buffer object. * @dst_offset: Destination offset of blit start in bytes. * @dst_stride: Destination stride in bytes. - * @src: Source buffer object. + * @vmw_src: Source buffer object. * @src_offset: Source offset of blit start in bytes. * @src_stride: Source stride in bytes. * @w: Width of blit. @@ -444,13 +536,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, * Neither of the buffer objects may be placed in PCI memory * (Fixed memory in TTM terminology) when using this function. */ -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *vmw_src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff) { + struct ttm_buffer_object *src = &vmw_src->tbo; + struct ttm_buffer_object *dst = &vmw_dst->tbo; struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false @@ -460,6 +554,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, int ret = 0; struct page **dst_pages = NULL; struct page **src_pages = NULL; + bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0; + + if (WARN_ON(dst == src)) + return -EINVAL;
/* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count)) @@ -479,6 +578,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; }
+ if (src_external || dst_external) + return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride, + vmw_src, src_offset, src_stride, + w, h, diff); + if (!src->ttm->pages && src->ttm->sg) { src_pages = kvmalloc_array(src->ttm->num_pages, sizeof(struct page *), GFP_KERNEL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 32f50e595809..3f4719b3c268 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1353,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n);
-int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *dst, u32 dst_offset, u32 dst_stride, - struct ttm_buffer_object *src, + struct vmw_bo *src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 5106413c14b7..3cc664384b66 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -502,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
@@ -517,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
/* Assume we are blitting from Guest (bo) to Host (display_srf) */ src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; - src_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + src_bo = stdu->display_srf->res.guest_memory_bo; src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp;
dst_pitch = ddirty->pitch; - dst_bo = &ddirty->buf->tbo; + dst_bo = ddirty->buf; dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
(void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch, @@ -1143,7 +1143,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0); struct vmw_stdu_update_gb_image *cmd_img = cmd; struct vmw_stdu_update *cmd_update; - struct ttm_buffer_object *src_bo, *dst_bo; + struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; s32 src_pitch, dst_pitch; s32 width, height; @@ -1157,11 +1157,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
diff.cpp = stdu->cpp;
- dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo; + dst_bo = stdu->display_srf->res.guest_memory_bo; dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp;
- src_bo = &vfbbo->buffer->tbo; + src_bo = vfbbo->buffer; src_pitch = update->vfb->base.pitches[0]; src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left * stdu->cpp;
LGTM.
Reviewed-by: Maaz Mombasawala maaz.mombasawala@broadcom.com
Hi Zack,
kernel test robot noticed the following build errors:
[auto build test ERROR on drm/drm-next] [also build test ERROR on drm-intel/for-linux-next-fixes drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.11-rc3 next-20240816] [cannot apply to drm-exynos/exynos-drm-next drm-intel/for-linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Zack-Rusin/drm-vmwgfx-Fix-pri... base: git://anongit.freedesktop.org/drm/drm drm-next patch link: https://lore.kernel.org/r/20240815153404.630214-2-zack.rusin%40broadcom.com patch subject: [PATCH v3 2/2] drm/vmwgfx: Fix prime with external buffers config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20240816/202408162343.umOPAMF9-lkp@i...) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240816/202408162343.umOPAMF9-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202408162343.umOPAMF9-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c: In function 'vmw_external_bo_copy':
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c:503:25: error: implicit declaration of function 'floor' [-Werror=implicit-function-declaration]
503 | diff->rect.y1 = floor(dst_offset / dst_stride); | ^~~~~ cc1: some warnings being treated as errors
vim +/floor +503 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
455 456 static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset, 457 u32 dst_stride, struct vmw_bo *src, 458 u32 src_offset, u32 src_stride, 459 u32 width_in_bytes, u32 height, 460 struct vmw_diff_cpy *diff) 461 { 462 struct vmw_private *vmw = 463 container_of(dst->tbo.bdev, struct vmw_private, bdev); 464 size_t dst_size = dst->tbo.resource->size; 465 size_t src_size = src->tbo.resource->size; 466 struct iosys_map dst_map = {0}; 467 struct iosys_map src_map = {0}; 468 int ret, i; 469 u8 *vsrc; 470 u8 *vdst; 471 472 vsrc = map_external(src, &src_map); 473 if (!vsrc) { 474 drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n"); 475 ret = -ENOMEM; 476 goto out; 477 } 478 479 vdst = map_external(dst, &dst_map); 480 if (!vdst) { 481 drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n"); 482 ret = -ENOMEM; 483 goto out; 484 } 485 486 vsrc += src_offset; 487 vdst += dst_offset; 488 if (src_stride == dst_stride) { 489 dst_size -= dst_offset; 490 src_size -= src_offset; 491 memcpy(vdst, vsrc, 492 min(dst_stride * height, min(dst_size, src_size))); 493 } else { 494 WARN_ON(dst_stride < width_in_bytes); 495 for (i = 0; i < height; ++i) { 496 memcpy(vdst, vsrc, width_in_bytes); 497 vsrc += src_stride; 498 vdst += dst_stride; 499 } 500 } 501 502 diff->rect.x1 = (dst_offset % dst_stride) / diff->cpp;
503 diff->rect.y1 = floor(dst_offset / dst_stride);
504 diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp; 505 diff->rect.y2 = diff->rect.y1 + height; 506 507 ret = 0; 508 out: 509 unmap_external(src, &src_map); 510 unmap_external(dst, &dst_map); 511 512 return ret; 513 } 514
Hi Zack,
kernel test robot noticed the following build errors:
[auto build test ERROR on drm/drm-next] [also build test ERROR on drm-intel/for-linux-next-fixes drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.11-rc3 next-20240816] [cannot apply to drm-exynos/exynos-drm-next drm-intel/for-linux-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Zack-Rusin/drm-vmwgfx-Fix-pri... base: git://anongit.freedesktop.org/drm/drm drm-next patch link: https://lore.kernel.org/r/20240815153404.630214-2-zack.rusin%40broadcom.com patch subject: [PATCH v3 2/2] drm/vmwgfx: Fix prime with external buffers config: i386-randconfig-013-20240816 (https://download.01.org/0day-ci/archive/20240817/202408170033.rkCwgtSp-lkp@i...) compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240817/202408170033.rkCwgtSp-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202408170033.rkCwgtSp-lkp@intel.com/
All errors (new ones prefixed by >>):
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c:503:18: error: call to undeclared function 'floor'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
503 | diff->rect.y1 = floor(dst_offset / dst_stride); | ^ 1 error generated.
vim +/floor +503 drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
455 456 static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset, 457 u32 dst_stride, struct vmw_bo *src, 458 u32 src_offset, u32 src_stride, 459 u32 width_in_bytes, u32 height, 460 struct vmw_diff_cpy *diff) 461 { 462 struct vmw_private *vmw = 463 container_of(dst->tbo.bdev, struct vmw_private, bdev); 464 size_t dst_size = dst->tbo.resource->size; 465 size_t src_size = src->tbo.resource->size; 466 struct iosys_map dst_map = {0}; 467 struct iosys_map src_map = {0}; 468 int ret, i; 469 u8 *vsrc; 470 u8 *vdst; 471 472 vsrc = map_external(src, &src_map); 473 if (!vsrc) { 474 drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n"); 475 ret = -ENOMEM; 476 goto out; 477 } 478 479 vdst = map_external(dst, &dst_map); 480 if (!vdst) { 481 drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n"); 482 ret = -ENOMEM; 483 goto out; 484 } 485 486 vsrc += src_offset; 487 vdst += dst_offset; 488 if (src_stride == dst_stride) { 489 dst_size -= dst_offset; 490 src_size -= src_offset; 491 memcpy(vdst, vsrc, 492 min(dst_stride * height, min(dst_size, src_size))); 493 } else { 494 WARN_ON(dst_stride < width_in_bytes); 495 for (i = 0; i < height; ++i) { 496 memcpy(vdst, vsrc, width_in_bytes); 497 vsrc += src_stride; 498 vdst += dst_stride; 499 } 500 } 501 502 diff->rect.x1 = (dst_offset % dst_stride) / diff->cpp;
503 diff->rect.y1 = floor(dst_offset / dst_stride);
504 diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp; 505 diff->rect.y2 = diff->rect.y1 + height; 506 507 ret = 0; 508 out: 509 unmap_external(src, &src_map); 510 unmap_external(dst, &dst_map); 511 512 return ret; 513 } 514
On Thu, Aug 15, 2024 at 6:34 PM Zack Rusin zack.rusin@broadcom.com wrote:
Make sure that for external buffers mapping goes through the dma_buf interface instead of trying to access pages directly.
External buffers might not provide direct access to readable/writable pages so to make sure the bo's created from external dma_bufs can be read dma_buf interface has to be used.
Fixes crashes in IGT's kms_prime with vgem. Regular desktop usage won't trigger this due to the fact that virtual machines will not have multiple GPUs but it enables better test coverage in IGT.
v2: Fix the diff rectangle computation
Signed-off-by: Zack Rusin zack.rusin@broadcom.com Fixes: b32233acceff ("drm/vmwgfx: Fix prime import/export") Cc: stable@vger.kernel.org # v6.6+ Cc: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v6.9+
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 112 ++++++++++++++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 12 +-- 3 files changed, 116 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c index 717d624e9a05..4049447d211c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_blit.c @@ -27,6 +27,8 @@ **************************************************************************/
#include "vmwgfx_drv.h"
+#include "vmwgfx_bo.h" #include <linux/highmem.h>
/* @@ -420,13 +422,103 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d, return 0; }
+static void *map_external(struct vmw_bo *bo, struct iosys_map *map) +{
struct vmw_private *vmw =
container_of(bo->tbo.bdev, struct vmw_private, bdev);
void *ptr = NULL;
int ret;
if (bo->tbo.base.import_attach) {
ret = dma_buf_vmap(bo->tbo.base.dma_buf, map);
if (ret) {
drm_dbg_driver(&vmw->drm,
"Wasn't able to map external bo!\n");
goto out;
}
ptr = map->vaddr;
} else {
ptr = vmw_bo_map_and_cache(bo);
}
+out:
return ptr;
+}
+static void unmap_external(struct vmw_bo *bo, struct iosys_map *map) +{
if (bo->tbo.base.import_attach)
dma_buf_vunmap(bo->tbo.base.dma_buf, map);
else
vmw_bo_unmap(bo);
+}
+static int vmw_external_bo_copy(struct vmw_bo *dst, u32 dst_offset,
u32 dst_stride, struct vmw_bo *src,
u32 src_offset, u32 src_stride,
u32 width_in_bytes, u32 height,
struct vmw_diff_cpy *diff)
+{
struct vmw_private *vmw =
container_of(dst->tbo.bdev, struct vmw_private, bdev);
size_t dst_size = dst->tbo.resource->size;
size_t src_size = src->tbo.resource->size;
struct iosys_map dst_map = {0};
struct iosys_map src_map = {0};
int ret, i;
u8 *vsrc;
u8 *vdst;
vsrc = map_external(src, &src_map);
if (!vsrc) {
drm_dbg_driver(&vmw->drm, "Wasn't able to map src\n");
ret = -ENOMEM;
goto out;
}
vdst = map_external(dst, &dst_map);
if (!vdst) {
drm_dbg_driver(&vmw->drm, "Wasn't able to map dst\n");
ret = -ENOMEM;
goto out;
}
vsrc += src_offset;
vdst += dst_offset;
if (src_stride == dst_stride) {
dst_size -= dst_offset;
src_size -= src_offset;
memcpy(vdst, vsrc,
min(dst_stride * height, min(dst_size, src_size)));
} else {
WARN_ON(dst_stride < width_in_bytes);
Wouldn't that be a hard BUG_ON / error condition? I mean, there'd likely be a buffer overrun ensuing.
for (i = 0; i < height; ++i) {
memcpy(vdst, vsrc, width_in_bytes);
vsrc += src_stride;
vdst += dst_stride;
}
}
diff->rect.x1 = (dst_offset % dst_stride) / diff->cpp;
diff->rect.y1 = floor(dst_offset / dst_stride);
That floor looks like a leftover from an earlier (signed integer) version?
diff->rect.x2 = diff->rect.x1 + width_in_bytes / diff->cpp;
diff->rect.y2 = diff->rect.y1 + height;
ret = 0;
+out:
unmap_external(src, &src_map);
unmap_external(dst, &dst_map);
return ret;
+}
/**
- vmw_bo_cpu_blit - in-kernel cpu blit.
- @dst: Destination buffer object.
- @vmw_dst: Destination buffer object.
- @dst_offset: Destination offset of blit start in bytes.
- @dst_stride: Destination stride in bytes.
- @src: Source buffer object.
- @vmw_src: Source buffer object.
- @src_offset: Source offset of blit start in bytes.
- @src_stride: Source stride in bytes.
- @w: Width of blit.
@@ -444,13 +536,15 @@ static int vmw_bo_cpu_blit_line(struct vmw_bo_blit_line_data *d,
- Neither of the buffer objects may be placed in PCI memory
- (Fixed memory in TTM terminology) when using this function.
*/ -int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *vmw_dst, u32 dst_offset, u32 dst_stride,
struct ttm_buffer_object *src,
struct vmw_bo *vmw_src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff)
{
struct ttm_buffer_object *src = &vmw_src->tbo;
struct ttm_buffer_object *dst = &vmw_dst->tbo; struct ttm_operation_ctx ctx = { .interruptible = false, .no_wait_gpu = false
@@ -460,6 +554,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, int ret = 0; struct page **dst_pages = NULL; struct page **src_pages = NULL;
bool src_external = (src->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
bool dst_external = (dst->ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
if (WARN_ON(dst == src))
return -EINVAL; /* Buffer objects need to be either pinned or reserved: */ if (!(dst->pin_count))
@@ -479,6 +578,11 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, return ret; }
if (src_external || dst_external)
return vmw_external_bo_copy(vmw_dst, dst_offset, dst_stride,
vmw_src, src_offset, src_stride,
w, h, diff);
if (!src->ttm->pages && src->ttm->sg) { src_pages = kvmalloc_array(src->ttm->num_pages, sizeof(struct page *), GFP_KERNEL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 32f50e595809..3f4719b3c268 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1353,9 +1353,9 @@ void vmw_diff_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src,
void vmw_memcpy(struct vmw_diff_cpy *diff, u8 *dest, const u8 *src, size_t n);
-int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, +int vmw_bo_cpu_blit(struct vmw_bo *dst, u32 dst_offset, u32 dst_stride,
struct ttm_buffer_object *src,
struct vmw_bo *src, u32 src_offset, u32 src_stride, u32 w, u32 h, struct vmw_diff_cpy *diff);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 5106413c14b7..3cc664384b66 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -502,7 +502,7 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty) container_of(dirty->unit, typeof(*stdu), base); s32 width, height; s32 src_pitch, dst_pitch;
struct ttm_buffer_object *src_bo, *dst_bo;
struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(stdu->cpp);
@@ -517,11 +517,11 @@ static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
/* Assume we are blitting from Guest (bo) to Host (display_srf) */ src_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp;
src_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
src_bo = stdu->display_srf->res.guest_memory_bo; src_offset = ddirty->top * src_pitch + ddirty->left * stdu->cpp; dst_pitch = ddirty->pitch;
dst_bo = &ddirty->buf->tbo;
dst_bo = ddirty->buf; dst_offset = ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp; (void) vmw_bo_cpu_blit(dst_bo, dst_offset, dst_pitch,
@@ -1143,7 +1143,7 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd, struct vmw_diff_cpy diff = VMW_CPU_BLIT_DIFF_INITIALIZER(0); struct vmw_stdu_update_gb_image *cmd_img = cmd; struct vmw_stdu_update *cmd_update;
struct ttm_buffer_object *src_bo, *dst_bo;
struct vmw_bo *src_bo, *dst_bo; u32 src_offset, dst_offset; s32 src_pitch, dst_pitch; s32 width, height;
@@ -1157,11 +1157,11 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane *update, void *cmd,
diff.cpp = stdu->cpp;
dst_bo = &stdu->display_srf->res.guest_memory_bo->tbo;
dst_bo = stdu->display_srf->res.guest_memory_bo; dst_pitch = stdu->display_srf->metadata.base_size.width * stdu->cpp; dst_offset = bb->y1 * dst_pitch + bb->x1 * stdu->cpp;
src_bo = &vfbbo->buffer->tbo;
src_bo = vfbbo->buffer; src_pitch = update->vfb->base.pitches[0]; src_offset = bo_update->fb_top * src_pitch + bo_update->fb_left * stdu->cpp;
-- 2.43.0
LGTM, just with those two remarks.
Reviewed-by: Martin Krastev martin.krastev@broadcom.com
Regards, Martin
On Thu, Aug 15, 2024 at 6:34 PM Zack Rusin zack.rusin@broadcom.com wrote:
The kms paths keep a persistent map active to read and compare the cursor buffer. These maps can race with each other in simple scenario where: a) buffer "a" mapped for update b) buffer "a" mapped for compare c) do the compare d) unmap "a" for compare e) update the cursor f) unmap "a" for update At step "e" the buffer has been unmapped and the read contents is bogus.
Prevent unmapping of active read buffers by simply keeping a count of how many paths have currently active maps and unmap only when the count reaches 0.
v2: Update doc strings
Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4") Cc: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Zack Rusin zack.rusin@broadcom.com
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 13 +++++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index f42ebc4a7c22..a0e433fbcba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -360,6 +360,8 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) void *virtual; int ret;
atomic_inc(&vbo->map_count);
virtual = ttm_kmap_obj_virtual(&vbo->map, ¬_used); if (virtual) return virtual;
@@ -383,11 +385,17 @@ void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) */ void vmw_bo_unmap(struct vmw_bo *vbo) {
int map_count;
if (vbo->map.bo == NULL) return;
ttm_bo_kunmap(&vbo->map);
vbo->map.bo = NULL;
map_count = atomic_dec_return(&vbo->map_count);
if (!map_count) {
ttm_bo_kunmap(&vbo->map);
vbo->map.bo = NULL;
}
}
@@ -421,6 +429,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; xa_init(&vmw_bo->detached_resources);
atomic_set(&vmw_bo->map_count, 0); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 62b4342d5f7c..43b5439ec9f7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -71,6 +71,8 @@ struct vmw_bo_params {
- @map: Kmap object for semi-persistent mappings
- @res_tree: RB tree of resources using this buffer object as a backing MOB
- @res_prios: Eviction priority counts for attached resources
- @map_count: The number of currently active maps. Will differ from the
- cpu_writers because it includes kernel maps.
- @cpu_writers: Number of synccpu write grabs. Protected by reservation when
- increased. May be decreased without reservation.
- @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
@@ -90,6 +92,7 @@ struct vmw_bo { u32 res_prios[TTM_MAX_BO_PRIORITY]; struct xarray detached_resources;
atomic_t map_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx;
-- 2.43.0
LGTM.
Reviewed-by: Martin Krastev martin.krastev@broadcom.com
Regards, Martin
linux-stable-mirror@lists.linaro.org