September 2021 - Linux-stable-mirror

FAILED: patch "[PATCH] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser"" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 93b713304188844b8514074dc13ffd56d12235d3 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand <jason(a)jlekstrand.net> Date: Wed, 14 Jul 2021 14:34:15 -0500 Subject: [PATCH] drm/i915: Revert "drm/i915/gem: Asynchronous cmdparser" This reverts 686c7c35abc2 ("drm/i915/gem: Asynchronous cmdparser"). The justification for this commit in the git history was a vague comment about getting it out from under the struct_mutex. While this may improve perf for some workloads on Gen7 platforms where we rely on the command parser for features such as indirect rendering, no numbers were provided to prove such an improvement. It claims to closed two gitlab/bugzilla issues but with no explanation whatsoever as to why or what bug it's fixing. Meanwhile, by moving command parsing off to an async callback, it leaves us with a problem of what to do on error. When things were synchronous, EXECBUFFER2 would fail with an error code if parsing failed. When moving it to async, we needed another way to handle that error and the solution employed was to set an error on the dma_fence and then trust that said error gets propagated to the client eventually. Moving back to synchronous will help us untangle the fence error propagation mess. This also reverts most of 0edbb9ba1bfe ("drm/i915: Move cmd parser pinning to execbuffer") which is a refactor of some of our allocation paths for asynchronous parsing. Now that everything is synchronous, we don't need it. v2 (Daniel Vetter): - Add stabel Cc and Fixes tag Signed-off-by: Jason Ekstrand <jason(a)jlekstrand.net> Cc: <stable(a)vger.kernel.org> # v5.6+ Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com> Reviewed-by: Jon Bloomfield <jon.bloomfield(a)intel.com> Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-2-jaso… diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cb493f1fcbe7..6ad166fa73e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -1460,6 +1458,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; struct intel_engine_cs *engine = eb->engine; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; if (!engine) @@ -2374,217 +2376,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2674,7 +2465,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53a..16162fc2782d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3992c25a191d..00ec618d0159 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, unsigned long offset, unsigned long length, - void *dst, const void *src) + bool *needs_clflush_after) { - bool needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - if (src) { - GEM_BUG_ON(!needs_clflush); - i915_unaligned_memcpy_from_wc(dst, src + offset, length); - } else { - struct scatterlist *sg; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; + int ret; + + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); + if (IS_ERR(dst)) + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && i915_has_memcpy_from_wc()) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); + i915_gem_object_unpin_map(src_obj); + } + } + if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - unsigned int x, sg_ofs; - unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * validate up to the end of the batch. */ remain = length; - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + if (dst_needs_clflush & CLFLUSH_BEFORE) remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); - - while (remain) { - unsigned long sg_max = sg->length >> PAGE_SHIFT; - - for (; remain && sg_ofs < sg_max; sg_ofs++) { - unsigned long len = min(remain, PAGE_SIZE - x); - void *map; - - map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); - if (needs_clflush) - drm_clflush_virt_range(map + x, len); - memcpy(ptr, map + x, len); - kunmap_atomic(map); - - ptr += len; - remain -= len; - x = 0; - } - - sg_ofs = 0; - sg = sg_next(sg); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); + kunmap_atomic(src); + + ptr += len; + remain -= len; + x = 0; } } + i915_gem_object_finish_access(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } @@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -/** - * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() - * @batch_length: length of the commands in batch_obj - * @trampoline: Whether jump trampolines are used. - * - * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). - * This has to be preallocated, because the command parser runs in signaling context, - * and may not allocate any memory. - * - * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use - * IS_ERR() to check for errors. Must bre freed() with kfree(). - * - * NULL is a valid value, meaning no allocation was required. - */ -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; - if (trampoline) - return NULL; - /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1425,21 +1426,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map) + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; + unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; - bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1447,8 +1448,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, - shadow_map, batch_map); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); + if (IS_ERR(cmd)) { + DRM_DEBUG("CMD: Failed to copy batch\n"); + return PTR_ERR(cmd); + } + + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1549,6 +1560,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index adc7dcf042a0..d0027b0e8afc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1902,17 +1902,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline); - int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map); + bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Revert "drm/i915: Propagate errors on awaiting already" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 93a2711cddd5760e2f0f901817d71c93183c3b87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand <jason(a)jlekstrand.net> Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: [PATCH] Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.… for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand <jason(a)jlekstrand.net> Reported-by: Marcin Slusarz <marcin.slusarz(a)intel.com> Cc: <stable(a)vger.kernel.org> # v5.6+ Cc: Jason Ekstrand <jason.ekstrand(a)intel.com> Cc: Marcin Slusarz <marcin.slusarz(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Reviewed-by: Jon Bloomfield <jon.bloomfield(a)intel.com> Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jaso… diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 86b4c9f2613d..09ebea9a0090 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1399,10 +1399,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1499,10 +1497,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Revert "drm/i915: Propagate errors on awaiting already" failed to apply to 5.13-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.13-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 93a2711cddd5760e2f0f901817d71c93183c3b87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand <jason(a)jlekstrand.net> Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: [PATCH] Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.… for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand <jason(a)jlekstrand.net> Reported-by: Marcin Slusarz <marcin.slusarz(a)intel.com> Cc: <stable(a)vger.kernel.org> # v5.6+ Cc: Jason Ekstrand <jason.ekstrand(a)intel.com> Cc: Marcin Slusarz <marcin.slusarz(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Reviewed-by: Jon Bloomfield <jon.bloomfield(a)intel.com> Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jaso… diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 86b4c9f2613d..09ebea9a0090 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1399,10 +1399,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1499,10 +1497,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] Revert "drm/i915: Propagate errors on awaiting already" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 93a2711cddd5760e2f0f901817d71c93183c3b87 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand <jason(a)jlekstrand.net> Date: Wed, 14 Jul 2021 14:34:16 -0500 Subject: [PATCH] Revert "drm/i915: Propagate errors on awaiting already signaled fences" This reverts commit 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7. Ever since that commit, we've been having issues where a hang in one client can propagate to another. In particular, a hang in an app can propagate to the X server which causes the whole desktop to lock up. Error propagation along fences sound like a good idea, but as your bug shows, surprising consequences, since propagating errors across security boundaries is not a good thing. What we do have is track the hangs on the ctx, and report information to userspace using RESET_STATS. That's how arb_robustness works. Also, if my understanding is still correct, the EIO from execbuf is when your context is banned (because not recoverable or too many hangs). And in all these cases it's up to userspace to figure out what is all impacted and should be reported to the application, that's not on the kernel to guess and automatically propagate. What's more, we're also building more features on top of ctx error reporting with RESET_STATS ioctl: Encrypted buffers use the same, and the userspace fence wait also relies on that mechanism. So it is the path going forward for reporting gpu hangs and resets to userspace. So all together that's why I think we should just bury this idea again as not quite the direction we want to go to, hence why I think the revert is the right option here. For backporters: Please note that you _must_ have a backport of https://lore.kernel.org/dri-devel/20210602164149.391653-2-jason@jlekstrand.… for otherwise backporting just this patch opens up a security bug. v2: Augment commit message. Also restore Jason's sob that I accidentally lost. v3: Add a note for backporters Signed-off-by: Jason Ekstrand <jason(a)jlekstrand.net> Reported-by: Marcin Slusarz <marcin.slusarz(a)intel.com> Cc: <stable(a)vger.kernel.org> # v5.6+ Cc: Jason Ekstrand <jason.ekstrand(a)intel.com> Cc: Marcin Slusarz <marcin.slusarz(a)intel.com> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3080 Fixes: 9e31c1fe45d5 ("drm/i915: Propagate errors on awaiting already signaled fences") Acked-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Reviewed-by: Jon Bloomfield <jon.bloomfield(a)intel.com> Signed-off-by: Daniel Vetter <daniel.vetter(a)ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210714193419.1459723-3-jaso… diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 86b4c9f2613d..09ebea9a0090 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1399,10 +1399,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1499,10 +1497,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/amd/display: Remove invalid assert for ODM + MPC case" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From f43a19fd0e976736d8f1b70b6fe1b6b88d6a900b Mon Sep 17 00:00:00 2001 From: Eric Bernstein <eric.bernstein(a)amd.com> Date: Mon, 26 Jul 2021 15:53:18 -0400 Subject: [PATCH] drm/amd/display: Remove invalid assert for ODM + MPC case Reviewed-by: Dmytro Laktyushkin <Dmytro.Laktyushkin(a)amd.com> Acked-by: Anson Jacob <Anson.Jacob(a)amd.com> Signed-off-by: Eric Bernstein <eric.bernstein(a)amd.com> Cc: stable(a)vger.kernel.org Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 253654d605c2..28e15ebf2f43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1788,7 +1788,6 @@ static bool dcn30_split_stream_for_mpc_or_odm( } pri_pipe->next_odm_pipe = sec_pipe; sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); if (!sec_pipe->top_pipe) sec_pipe->stream_res.opp = pool->opps[pipe_idx];

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/amdgpu: stop scheduler when calling hw_fini (v2)" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From f7d6779df642720e22bffd449e683bb8690bd3bf Mon Sep 17 00:00:00 2001 From: Guchun Chen <guchun.chen(a)amd.com> Date: Fri, 27 Aug 2021 18:31:41 +0800 Subject: [PATCH] drm/amdgpu: stop scheduler when calling hw_fini (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gurantees no more work on the ring can be submitted to hardware in suspend/resume case, otherwise a potential race will occur and the ring will get no chance to stay empty before suspend. v2: Call drm_sched_resubmit_job before drm_sched_start to restart jobs from the pending list. Suggested-by: Andrey Grodzovsky <andrey.grodzovsky(a)amd.com> Suggested-by: Christian König <christian.koenig(a)amd.com> Signed-off-by: Guchun Chen <guchun.chen(a)amd.com> Reviewed-by: Christian König <christian.koenig(a)amd.com> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 14499f0de32d..8d682befe0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -552,6 +552,9 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) + drm_sched_stop(&ring->sched, NULL); + /* You can't wait for HW to signal if it's gone */ if (!drm_dev_is_unplugged(&adev->ddev)) r = amdgpu_fence_wait_empty(ring); @@ -611,6 +614,11 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; + if (!ring->no_scheduler) { + drm_sched_resubmit_jobs(&ring->sched); + drm_sched_start(&ring->sched, true); + } + /* enable the interrupt */ if (ring->fence_drv.irq_src) amdgpu_irq_get(adev, ring->fence_drv.irq_src,

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/amdgpu: use the preferred pin domain after the check" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 9deb0b3dcf13e573d54bec8498f044da9780f4e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com> Date: Wed, 18 Aug 2021 14:05:28 +0200 Subject: [PATCH] drm/amdgpu: use the preferred pin domain after the check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason we run into an use case where a BO is already pinned into GTT, but should be pinned into VRAM|GTT again. Handle that case gracefully as well. Reviewed-by: Shashank Sharma <Shashank.sharma(a)amd.com> Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com> Signed-off-by: Christian König <christian.koenig(a)amd.com> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d15eee98204d..7734c10ae74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; } - /* This assumes only APU display buffers are pinned with (VRAM|GTT). - * See function amdgpu_display_supported_domains() - */ - domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); - if (bo->tbo.pin_count) { uint32_t mem_type = bo->tbo.resource->mem_type; uint32_t mem_flags = bo->tbo.resource->placement; @@ -949,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return 0; } + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); + if (bo->tbo.base.import_attach) dma_buf_pin(bo->tbo.base.import_attach);

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/amdgpu: use the preferred pin domain after the check" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 9deb0b3dcf13e573d54bec8498f044da9780f4e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig(a)amd.com> Date: Wed, 18 Aug 2021 14:05:28 +0200 Subject: [PATCH] drm/amdgpu: use the preferred pin domain after the check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some reason we run into an use case where a BO is already pinned into GTT, but should be pinned into VRAM|GTT again. Handle that case gracefully as well. Reviewed-by: Shashank Sharma <Shashank.sharma(a)amd.com> Reviewed-by: Alex Deucher <alexander.deucher(a)amd.com> Signed-off-by: Christian König <christian.koenig(a)amd.com> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d15eee98204d..7734c10ae74e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -920,11 +920,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return -EINVAL; } - /* This assumes only APU display buffers are pinned with (VRAM|GTT). - * See function amdgpu_display_supported_domains() - */ - domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); - if (bo->tbo.pin_count) { uint32_t mem_type = bo->tbo.resource->mem_type; uint32_t mem_flags = bo->tbo.resource->placement; @@ -949,6 +944,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, return 0; } + /* This assumes only APU display buffers are pinned with (VRAM|GTT). + * See function amdgpu_display_supported_domains() + */ + domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); + if (bo->tbo.base.import_attach) dma_buf_pin(bo->tbo.base.import_attach);

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/amd/display: use GFP_ATOMIC in" failed to apply to 5.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 704bd53543c661428f02f5dc52413cb6369603cf Mon Sep 17 00:00:00 2001 From: Anson Jacob <Anson.Jacob(a)amd.com> Date: Fri, 30 Jul 2021 19:46:20 -0400 Subject: [PATCH] drm/amd/display: use GFP_ATOMIC in amdgpu_dm_irq_schedule_work Replace GFP_KERNEL with GFP_ATOMIC as amdgpu_dm_irq_schedule_work can't sleep. BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 253, name: kworker/6:1H CPU: 6 PID: 253 Comm: kworker/6:1H Tainted: G W OE 5.11.0-promotion_2021_06_07-18_36_28_prelim_revert_retrain #8 Hardware name: System manufacturer System Product Name/PRIME X570-PRO, BIOS 3405 02/01/2021 Workqueue: events_highpri dm_irq_work_func [amdgpu] Call Trace: <IRQ> dump_stack+0x5e/0x74 ___might_sleep.cold+0x87/0x98 __might_sleep+0x4b/0x80 kmem_cache_alloc_trace+0x390/0x4f0 amdgpu_dm_irq_handler+0x171/0x230 [amdgpu] amdgpu_irq_dispatch+0xc0/0x1e0 [amdgpu] amdgpu_ih_process+0x81/0x100 [amdgpu] amdgpu_irq_handler+0x26/0xa0 [amdgpu] __handle_irq_event_percpu+0x49/0x190 ? __hrtimer_get_next_event+0x4d/0x80 handle_irq_event_percpu+0x33/0x80 handle_irq_event+0x33/0x60 handle_edge_irq+0x82/0x190 asm_call_irq_on_stack+0x12/0x20 </IRQ> common_interrupt+0xbb/0x140 asm_common_interrupt+0x1e/0x40 RIP: 0010:amdgpu_device_rreg.part.0+0x44/0xf0 [amdgpu] Code: 53 48 89 fb 4c 3b af c8 08 00 00 73 6d 83 e2 02 75 0d f6 87 40 62 01 00 10 0f 85 83 00 00 00 4c 03 ab d0 08 00 00 45 8b 6d 00 <8b> 05 3e b6 52 00 85 c0 7e 62 48 8b 43 08 0f b7 70 3e 65 8b 05 e3 RSP: 0018:ffffae7740fff9e8 EFLAGS: 00000286 RAX: ffffffffc05ee610 RBX: ffff8aaf8f620000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000005430 RDI: ffff8aaf8f620000 RBP: ffffae7740fffa08 R08: 0000000000000001 R09: 000000000000000a R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000005430 R13: 0000000071000000 R14: 0000000000000001 R15: 0000000000005430 ? amdgpu_cgs_write_register+0x20/0x20 [amdgpu] amdgpu_device_rreg+0x17/0x20 [amdgpu] amdgpu_cgs_read_register+0x14/0x20 [amdgpu] dm_read_reg_func+0x38/0xb0 [amdgpu] generic_reg_wait+0x80/0x160 [amdgpu] dce_aux_transfer_raw+0x324/0x7c0 [amdgpu] dc_link_aux_transfer_raw+0x43/0x50 [amdgpu] dm_dp_aux_transfer+0x87/0x110 [amdgpu] drm_dp_dpcd_access+0x72/0x110 [drm_kms_helper] drm_dp_dpcd_read+0xb7/0xf0 [drm_kms_helper] drm_dp_get_one_sb_msg+0x349/0x480 [drm_kms_helper] drm_dp_mst_hpd_irq+0xc5/0xe40 [drm_kms_helper] ? drm_dp_mst_hpd_irq+0xc5/0xe40 [drm_kms_helper] dm_handle_hpd_rx_irq+0x184/0x1a0 [amdgpu] ? dm_handle_hpd_rx_irq+0x184/0x1a0 [amdgpu] handle_hpd_rx_irq+0x195/0x240 [amdgpu] ? __switch_to_asm+0x42/0x70 ? __switch_to+0x131/0x450 dm_irq_work_func+0x19/0x20 [amdgpu] process_one_work+0x209/0x400 worker_thread+0x4d/0x3e0 ? cancel_delayed_work+0xa0/0xa0 kthread+0x124/0x160 ? kthread_park+0x90/0x90 ret_from_fork+0x22/0x30 Reviewed-by: Aurabindo Jayamohanan Pillai <Aurabindo.Pillai(a)amd.com> Acked-by: Anson Jacob <Anson.Jacob(a)amd.com> Signed-off-by: Anson Jacob <Anson.Jacob(a)amd.com> Cc: stable(a)vger.kernel.org Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com> Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 40f617bbb86f..4aba0e8c84f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -584,7 +584,7 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev, handler_data = container_of(handler_list->next, struct amdgpu_dm_irq_handler_data, list); /*allocate a new amdgpu_dm_irq_handler_data*/ - handler_data_add = kzalloc(sizeof(*handler_data), GFP_KERNEL); + handler_data_add = kzalloc(sizeof(*handler_data), GFP_ATOMIC); if (!handler_data_add) { DRM_ERROR("DM_IRQ: failed to allocate irq handler!\n"); return;

4 years, 3 months

1
0
0 0

FAILED: patch "[PATCH] drm/panfrost: Make sure MMU context lifetime is not bound to" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 7fdc48cc63a30fa3480d18bdd8c5fff2b9b15212 Mon Sep 17 00:00:00 2001 From: Boris Brezillon <boris.brezillon(a)collabora.com> Date: Mon, 21 Jun 2021 15:38:56 +0200 Subject: [PATCH] drm/panfrost: Make sure MMU context lifetime is not bound to panfrost_priv Jobs can be in-flight when the file descriptor is closed (either because the process did not terminate properly, or because it didn't wait for all GPU jobs to be finished), and apparently panfrost_job_close() does not cancel already running jobs. Let's refcount the MMU context object so it's lifetime is no longer bound to the FD lifetime and running jobs can finish properly without generating spurious page faults. Reported-by: Icecream95 <ixn(a)keemail.me> Fixes: 7282f7645d06 ("drm/panfrost: Implement per FD address spaces") Cc: <stable(a)vger.kernel.org> Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com> Reviewed-by: Steven Price <steven.price(a)arm.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210621133907.1683899-2-bori… diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h index f614e98771e4..8b2cdb8c701d 100644 --- a/drivers/gpu/drm/panfrost/panfrost_device.h +++ b/drivers/gpu/drm/panfrost/panfrost_device.h @@ -121,8 +121,12 @@ struct panfrost_device { }; struct panfrost_mmu { + struct panfrost_device *pfdev; + struct kref refcount; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; + struct drm_mm mm; + spinlock_t mm_lock; int as; atomic_t as_count; struct list_head list; @@ -133,9 +137,7 @@ struct panfrost_file_priv { struct drm_sched_entity sched_entity[NUM_JOB_SLOTS]; - struct panfrost_mmu mmu; - struct drm_mm mm; - spinlock_t mm_lock; + struct panfrost_mmu *mmu; }; static inline struct panfrost_device *to_panfrost_device(struct drm_device *ddev) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 3ee828f1e7a5..1ffaef5ec5ff 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -412,7 +412,7 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data, * anyway, so let's not bother. */ if (!list_is_singular(&bo->mappings.list) || - WARN_ON_ONCE(first->mmu != &priv->mmu)) { + WARN_ON_ONCE(first->mmu != priv->mmu)) { ret = -EINVAL; goto out_unlock_mappings; } @@ -444,32 +444,6 @@ int panfrost_unstable_ioctl_check(void) return 0; } -#define PFN_4G (SZ_4G >> PAGE_SHIFT) -#define PFN_4G_MASK (PFN_4G - 1) -#define PFN_16M (SZ_16M >> PAGE_SHIFT) - -static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, - unsigned long color, - u64 *start, u64 *end) -{ - /* Executable buffers can't start or end on a 4GB boundary */ - if (!(color & PANFROST_BO_NOEXEC)) { - u64 next_seg; - - if ((*start & PFN_4G_MASK) == 0) - (*start)++; - - if ((*end & PFN_4G_MASK) == 0) - (*end)--; - - next_seg = ALIGN(*start, PFN_4G); - if (next_seg - *start <= PFN_16M) - *start = next_seg + 1; - - *end = min(*end, ALIGN(*start, PFN_4G) - 1); - } -} - static int panfrost_open(struct drm_device *dev, struct drm_file *file) { @@ -484,15 +458,11 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) panfrost_priv->pfdev = pfdev; file->driver_priv = panfrost_priv; - spin_lock_init(&panfrost_priv->mm_lock); - - /* 4G enough for now. can be 48-bit */ - drm_mm_init(&panfrost_priv->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); - panfrost_priv->mm.color_adjust = panfrost_drm_mm_color_adjust; - - ret = panfrost_mmu_pgtable_alloc(panfrost_priv); - if (ret) - goto err_pgtable; + panfrost_priv->mmu = panfrost_mmu_ctx_create(pfdev); + if (IS_ERR(panfrost_priv->mmu)) { + ret = PTR_ERR(panfrost_priv->mmu); + goto err_free; + } ret = panfrost_job_open(panfrost_priv); if (ret) @@ -501,9 +471,8 @@ panfrost_open(struct drm_device *dev, struct drm_file *file) return 0; err_job: - panfrost_mmu_pgtable_free(panfrost_priv); -err_pgtable: - drm_mm_takedown(&panfrost_priv->mm); + panfrost_mmu_ctx_put(panfrost_priv->mmu); +err_free: kfree(panfrost_priv); return ret; } @@ -516,8 +485,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file) panfrost_perfcnt_close(file); panfrost_job_close(panfrost_priv); - panfrost_mmu_pgtable_free(panfrost_priv); - drm_mm_takedown(&panfrost_priv->mm); + panfrost_mmu_ctx_put(panfrost_priv->mmu); kfree(panfrost_priv); } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 3e0723bc36bd..23377481f4e3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -60,7 +60,7 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, mutex_lock(&bo->mappings.lock); list_for_each_entry(iter, &bo->mappings.list, node) { - if (iter->mmu == &priv->mmu) { + if (iter->mmu == priv->mmu) { kref_get(&iter->refcount); mapping = iter; break; @@ -74,16 +74,13 @@ panfrost_gem_mapping_get(struct panfrost_gem_object *bo, static void panfrost_gem_teardown_mapping(struct panfrost_gem_mapping *mapping) { - struct panfrost_file_priv *priv; - if (mapping->active) panfrost_mmu_unmap(mapping); - priv = container_of(mapping->mmu, struct panfrost_file_priv, mmu); - spin_lock(&priv->mm_lock); + spin_lock(&mapping->mmu->mm_lock); if (drm_mm_node_allocated(&mapping->mmnode)) drm_mm_remove_node(&mapping->mmnode); - spin_unlock(&priv->mm_lock); + spin_unlock(&mapping->mmu->mm_lock); } static void panfrost_gem_mapping_release(struct kref *kref) @@ -94,6 +91,7 @@ static void panfrost_gem_mapping_release(struct kref *kref) panfrost_gem_teardown_mapping(mapping); drm_gem_object_put(&mapping->obj->base.base); + panfrost_mmu_ctx_put(mapping->mmu); kfree(mapping); } @@ -143,11 +141,11 @@ int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv) else align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0; - mapping->mmu = &priv->mmu; - spin_lock(&priv->mm_lock); - ret = drm_mm_insert_node_generic(&priv->mm, &mapping->mmnode, + mapping->mmu = panfrost_mmu_ctx_get(priv->mmu); + spin_lock(&mapping->mmu->mm_lock); + ret = drm_mm_insert_node_generic(&mapping->mmu->mm, &mapping->mmnode, size >> PAGE_SHIFT, align, color, 0); - spin_unlock(&priv->mm_lock); + spin_unlock(&mapping->mmu->mm_lock); if (ret) goto err; @@ -176,7 +174,7 @@ void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv) mutex_lock(&bo->mappings.lock); list_for_each_entry(iter, &bo->mappings.list, node) { - if (iter->mmu == &priv->mmu) { + if (iter->mmu == priv->mmu) { mapping = iter; list_del(&iter->node); break; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index ef004d587dc4..beb62c8fc851 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -165,7 +165,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) return; } - cfg = panfrost_mmu_as_get(pfdev, &job->file_priv->mmu); + cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu); job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF); job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32); @@ -516,7 +516,7 @@ static irqreturn_t panfrost_job_irq_handler(int irq, void *data) if (job) { pfdev->jobs[j] = NULL; - panfrost_mmu_as_put(pfdev, &job->file_priv->mmu); + panfrost_mmu_as_put(pfdev, job->file_priv->mmu); panfrost_devfreq_record_idle(&pfdev->pfdevfreq); dma_fence_signal_locked(job->done_fence); diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 0581186ebfb3..569509c2ba27 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright 2019 Linaro, Ltd, Rob Herring <robh(a)kernel.org> */ + +#include <drm/panfrost_drm.h> + #include <linux/atomic.h> #include <linux/bitfield.h> #include <linux/delay.h> @@ -337,7 +340,7 @@ static void mmu_tlb_inv_context_s1(void *cookie) static void mmu_tlb_sync_context(void *cookie) { - //struct panfrost_device *pfdev = cookie; + //struct panfrost_mmu *mmu = cookie; // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X } @@ -352,57 +355,10 @@ static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_walk = mmu_tlb_flush_walk, }; -int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv) -{ - struct panfrost_mmu *mmu = &priv->mmu; - struct panfrost_device *pfdev = priv->pfdev; - - INIT_LIST_HEAD(&mmu->list); - mmu->as = -1; - - mmu->pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = SZ_4K | SZ_2M, - .ias = FIELD_GET(0xff, pfdev->features.mmu_features), - .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), - .coherent_walk = pfdev->coherent, - .tlb = &mmu_tlb_ops, - .iommu_dev = pfdev->dev, - }; - - mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, - priv); - if (!mmu->pgtbl_ops) - return -EINVAL; - - return 0; -} - -void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv) -{ - struct panfrost_device *pfdev = priv->pfdev; - struct panfrost_mmu *mmu = &priv->mmu; - - spin_lock(&pfdev->as_lock); - if (mmu->as >= 0) { - pm_runtime_get_noresume(pfdev->dev); - if (pm_runtime_active(pfdev->dev)) - panfrost_mmu_disable(pfdev, mmu->as); - pm_runtime_put_autosuspend(pfdev->dev); - - clear_bit(mmu->as, &pfdev->as_alloc_mask); - clear_bit(mmu->as, &pfdev->as_in_use_mask); - list_del(&mmu->list); - } - spin_unlock(&pfdev->as_lock); - - free_io_pgtable_ops(mmu->pgtbl_ops); -} - static struct panfrost_gem_mapping * addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) { struct panfrost_gem_mapping *mapping = NULL; - struct panfrost_file_priv *priv; struct drm_mm_node *node; u64 offset = addr >> PAGE_SHIFT; struct panfrost_mmu *mmu; @@ -415,11 +371,10 @@ addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) goto out; found_mmu: - priv = container_of(mmu, struct panfrost_file_priv, mmu); - spin_lock(&priv->mm_lock); + spin_lock(&mmu->mm_lock); - drm_mm_for_each_node(node, &priv->mm) { + drm_mm_for_each_node(node, &mmu->mm) { if (offset >= node->start && offset < (node->start + node->size)) { mapping = drm_mm_node_to_panfrost_mapping(node); @@ -429,7 +384,7 @@ addr_to_mapping(struct panfrost_device *pfdev, int as, u64 addr) } } - spin_unlock(&priv->mm_lock); + spin_unlock(&mmu->mm_lock); out: spin_unlock(&pfdev->as_lock); return mapping; @@ -542,6 +497,107 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, return ret; } +static void panfrost_mmu_release_ctx(struct kref *kref) +{ + struct panfrost_mmu *mmu = container_of(kref, struct panfrost_mmu, + refcount); + struct panfrost_device *pfdev = mmu->pfdev; + + spin_lock(&pfdev->as_lock); + if (mmu->as >= 0) { + pm_runtime_get_noresume(pfdev->dev); + if (pm_runtime_active(pfdev->dev)) + panfrost_mmu_disable(pfdev, mmu->as); + pm_runtime_put_autosuspend(pfdev->dev); + + clear_bit(mmu->as, &pfdev->as_alloc_mask); + clear_bit(mmu->as, &pfdev->as_in_use_mask); + list_del(&mmu->list); + } + spin_unlock(&pfdev->as_lock); + + free_io_pgtable_ops(mmu->pgtbl_ops); + drm_mm_takedown(&mmu->mm); + kfree(mmu); +} + +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu) +{ + kref_put(&mmu->refcount, panfrost_mmu_release_ctx); +} + +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu) +{ + kref_get(&mmu->refcount); + + return mmu; +} + +#define PFN_4G (SZ_4G >> PAGE_SHIFT) +#define PFN_4G_MASK (PFN_4G - 1) +#define PFN_16M (SZ_16M >> PAGE_SHIFT) + +static void panfrost_drm_mm_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, u64 *end) +{ + /* Executable buffers can't start or end on a 4GB boundary */ + if (!(color & PANFROST_BO_NOEXEC)) { + u64 next_seg; + + if ((*start & PFN_4G_MASK) == 0) + (*start)++; + + if ((*end & PFN_4G_MASK) == 0) + (*end)--; + + next_seg = ALIGN(*start, PFN_4G); + if (next_seg - *start <= PFN_16M) + *start = next_seg + 1; + + *end = min(*end, ALIGN(*start, PFN_4G) - 1); + } +} + +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev) +{ + struct panfrost_mmu *mmu; + + mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); + if (!mmu) + return ERR_PTR(-ENOMEM); + + mmu->pfdev = pfdev; + spin_lock_init(&mmu->mm_lock); + + /* 4G enough for now. can be 48-bit */ + drm_mm_init(&mmu->mm, SZ_32M >> PAGE_SHIFT, (SZ_4G - SZ_32M) >> PAGE_SHIFT); + mmu->mm.color_adjust = panfrost_drm_mm_color_adjust; + + INIT_LIST_HEAD(&mmu->list); + mmu->as = -1; + + mmu->pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = SZ_4K | SZ_2M, + .ias = FIELD_GET(0xff, pfdev->features.mmu_features), + .oas = FIELD_GET(0xff00, pfdev->features.mmu_features), + .coherent_walk = pfdev->coherent, + .tlb = &mmu_tlb_ops, + .iommu_dev = pfdev->dev, + }; + + mmu->pgtbl_ops = alloc_io_pgtable_ops(ARM_MALI_LPAE, &mmu->pgtbl_cfg, + mmu); + if (!mmu->pgtbl_ops) { + kfree(mmu); + return ERR_PTR(-EINVAL); + } + + kref_init(&mmu->refcount); + + return mmu; +} + static const char *access_type_name(struct panfrost_device *pfdev, u32 fault_status) { diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.h b/drivers/gpu/drm/panfrost/panfrost_mmu.h index 44fc2edf63ce..cc2a0d307feb 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.h +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.h @@ -18,7 +18,8 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev); u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); void panfrost_mmu_as_put(struct panfrost_device *pfdev, struct panfrost_mmu *mmu); -int panfrost_mmu_pgtable_alloc(struct panfrost_file_priv *priv); -void panfrost_mmu_pgtable_free(struct panfrost_file_priv *priv); +struct panfrost_mmu *panfrost_mmu_ctx_get(struct panfrost_mmu *mmu); +void panfrost_mmu_ctx_put(struct panfrost_mmu *mmu); +struct panfrost_mmu *panfrost_mmu_ctx_create(struct panfrost_device *pfdev); #endif

4 years, 3 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror September 2021