From: Chris Wilson chris.p.wilson@intel.com
commit db100e28fdf026a1fc10657c5170bb1e65663805 upstream.
Check if the device is powered down prior to any engine activity, as, on such cases, all the TLBs were already invalidated, so an explicit TLB invalidation is not needed, thus reducing the performance regression impact due to it.
This becomes more significant with GuC, as it can only do so when the connection to the GuC is awake.
Cc: stable@vger.kernel.org Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Signed-off-by: Chris Wilson chris.p.wilson@intel.com Cc: Fei Yang fei.yang@intel.com Reviewed-by: Andi Shyti andi.shyti@linux.intel.com Acked-by: Thomas Hellström thomas.hellstrom@linux.intel.com Acked-by: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Mauro Carvalho Chehab mchehab@kernel.org Signed-off-by: Andi Shyti andi.shyti@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/278a57a672edac75683f0818b292e9... (cherry picked from commit 4bedceaed1ae1172cfe72d3ff752b3a1d32fe4d9) Signed-off-by: Rodrigo Vivi rodrigo.vivi@intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++++++---- drivers/gpu/drm/i915/gt/intel_gt.c | 17 ++++++++++------- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 3 +++ 3 files changed, 19 insertions(+), 11 deletions(-)
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -6,14 +6,15 @@
#include <drm/drm_cache.h>
+#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_gem_lmem.h" #include "i915_gem_mman.h"
-#include "gt/intel_gt.h" - void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -217,10 +218,11 @@ __i915_gem_object_unset_pages(struct drm
if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = to_gt(i915); intel_wakeref_t wakeref;
- with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) - intel_gt_invalidate_tlbs(to_gt(i915)); + with_intel_gt_pm_if_awake(gt, wakeref) + intel_gt_invalidate_tlbs(gt); }
return pages; --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -11,6 +11,7 @@
#include "i915_drv.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_gt.h" #include "intel_gt_buffer_pool.h" @@ -1181,6 +1182,7 @@ void intel_gt_invalidate_tlbs(struct int struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; + intel_engine_mask_t awake, tmp; enum intel_engine_id id; const i915_reg_t *regs; unsigned int num = 0; @@ -1204,26 +1206,31 @@ void intel_gt_invalidate_tlbs(struct int
GEM_TRACE("\n");
- assert_rpm_wakelock_held(&i915->runtime_pm); - mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+ awake = 0; for_each_engine(engine, gt, id) { struct reg_and_bit rb;
+ if (!intel_engine_pm_is_awake(engine)) + continue; + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit); + awake |= engine->mask; }
spin_unlock_irq(&uncore->lock);
- for_each_engine(engine, gt, id) { + for_each_engine_masked(engine, gt, awake, tmp) { + struct reg_and_bit rb; + /* * HW architecture suggest typical invalidation time at 40us, * with pessimistic cases up to 100us and a recommendation to @@ -1231,12 +1238,8 @@ void intel_gt_invalidate_tlbs(struct int */ const unsigned int timeout_us = 100; const unsigned int timeout_ms = 4; - struct reg_and_bit rb;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; - if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -55,6 +55,9 @@ static inline void intel_gt_pm_might_put for (tmp = 1, intel_gt_pm_get(gt); tmp; \ intel_gt_pm_put(gt), tmp = 0)
+#define with_intel_gt_pm_if_awake(gt, wf) \ + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref);