Hi,
this series does basically two things:
1. Disables automatic load balancing as adviced by the hardware workaround.
2. Assigns all the CCS slices to one single user engine. The user will then be able to query only one CCS engine
I'm using here the "Requires: " tag, but I'm not sure the commit id will be valid, on the other hand, I don't know what commit id I should use.
Thanks Tvrtko, Matt and John for your reviews!
Andi
Changelog ========= v2 -> v3 - Simplified the algorithm for creating the list of the exported uabi engines. (Patch 1) (Thanks, Tvrtko) - Consider the fused engines when creating the uabi engine list (Patch 2) (Thanks, Matt) - Patch 4 now uses a the refactoring from patch 1, in a cleaner outcome.
v1 -> v2 - In Patch 1 use the correct workaround number (thanks Matt). - In Patch 2 do not add the extra CCS engines to the exposed UABI engine list and adapt the engine counting accordingly (thanks Tvrtko). - Reword the commit of Patch 2 (thanks John).
Andi Shyti (4): drm/i915/gt: Refactor uabi engine class/instance list creation drm/i915/gt: Do not exposed fused off engines. drm/i915/gt: Disable HW load balancing for CCS drm/i915/gt: Enable only one CCS for compute workload
drivers/gpu/drm/i915/gt/intel_engine_user.c | 52 ++++++++++++++++----- drivers/gpu/drm/i915/gt/intel_gt.c | 11 +++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 ++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 +++ 4 files changed, 60 insertions(+), 12 deletions(-)
For the upcoming changes we need a cleaner way to build the list of uabi engines.
Suggested-by: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Andi Shyti andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 29 ++++++++++++--------- 1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 833987015b8b..cf8f24ad88f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -203,7 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16
void intel_engines_driver_register(struct drm_i915_private *i915) { - u16 name_instance, other_instance = 0; + u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 1] = { }; struct legacy_ring ring = {}; struct list_head *it, *next; struct rb_node **p, *prev; @@ -214,6 +214,8 @@ void intel_engines_driver_register(struct drm_i915_private *i915) prev = NULL; p = &i915->uabi_engines.rb_node; list_for_each_safe(it, next, &engines) { + u16 uabi_class; + struct intel_engine_cs *engine = container_of(it, typeof(*engine), uabi_list);
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class]; - if (engine->uabi_class == I915_NO_UABI_CLASS) { - name_instance = other_instance++; - } else { - GEM_BUG_ON(engine->uabi_class >= - ARRAY_SIZE(i915->engine_uabi_class_count)); - name_instance = - i915->engine_uabi_class_count[engine->uabi_class]++; - } - engine->uabi_instance = name_instance; + + if (engine->uabi_class == I915_NO_UABI_CLASS) + uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1; + else + uabi_class = engine->uabi_class; + + GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance)); + engine->uabi_instance = class_instance[uabi_class]++;
/* * Replace the internal name with the final user and log facing @@ -238,11 +239,15 @@ void intel_engines_driver_register(struct drm_i915_private *i915) */ engine_rename(engine, intel_engine_class_repr(engine->class), - name_instance); + engine->uabi_instance);
- if (engine->uabi_class == I915_NO_UABI_CLASS) + if (uabi_class > I915_LAST_UABI_ENGINE_CLASS) continue;
+ GEM_BUG_ON(uabi_class >= + ARRAY_SIZE(i915->engine_uabi_class_count)); + i915->engine_uabi_class_count[uabi_class]++; + rb_link_node(&engine->uabi_node, prev, p); rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html#opti...
Rule: add the tag "Cc: stable@vger.kernel.org" in the sign-off area to have the patch automatically included in the stable tree. Subject: [PATCH v3 1/4] drm/i915/gt: Refactor uabi engine class/instance list creation Link: https://lore.kernel.org/stable/20240229232859.70058-2-andi.shyti%40linux.int...
Quoting Andi Shyti (2024-03-01 01:28:56)
For the upcoming changes we need a cleaner way to build the list of uabi engines.
Suggested-by: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Andi Shyti andi.shyti@linux.intel.com
drivers/gpu/drm/i915/gt/intel_engine_user.c | 29 ++++++++++++--------- 1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 833987015b8b..cf8f24ad88f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -203,7 +203,7 @@ static void engine_rename(struct intel_engine_cs *engine, const char *name, u16 void intel_engines_driver_register(struct drm_i915_private *i915) {
u16 name_instance, other_instance = 0;
u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 1] = { };
Do you mean this to be size I915_LAST_UABI_ENGINE_CLASS + 2? Because ...
<SNIP>
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct drm_i915_private *i915) GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class];
if (engine->uabi_class == I915_NO_UABI_CLASS) {
name_instance = other_instance++;
} else {
GEM_BUG_ON(engine->uabi_class >=
ARRAY_SIZE(i915->engine_uabi_class_count));
name_instance =
i915->engine_uabi_class_count[engine->uabi_class]++;
}
engine->uabi_instance = name_instance;
if (engine->uabi_class == I915_NO_UABI_CLASS)
uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
.. otherwise this ...
else
uabi_class = engine->uabi_class;
GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
.. will trigger this assertion?
Regards, Joonas
Hi Joonas,
...
void intel_engines_driver_register(struct drm_i915_private *i915) {
u16 name_instance, other_instance = 0;
u16 class_instance[I915_LAST_UABI_ENGINE_CLASS + 1] = { };
Do you mean this to be size I915_LAST_UABI_ENGINE_CLASS + 2? Because ...
Yes, this is an oversight. I was playing around with indexes to optimize the code a bit more and I forgot to restore this back.
Thanks, Andi
<SNIP>
@@ -222,15 +224,14 @@ void intel_engines_driver_register(struct drm_i915_private *i915) GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes)); engine->uabi_class = uabi_classes[engine->class];
if (engine->uabi_class == I915_NO_UABI_CLASS) {
name_instance = other_instance++;
} else {
GEM_BUG_ON(engine->uabi_class >=
ARRAY_SIZE(i915->engine_uabi_class_count));
name_instance =
i915->engine_uabi_class_count[engine->uabi_class]++;
}
engine->uabi_instance = name_instance;
if (engine->uabi_class == I915_NO_UABI_CLASS)
uabi_class = I915_LAST_UABI_ENGINE_CLASS + 1;
.. otherwise this ...
else
uabi_class = engine->uabi_class;
GEM_BUG_ON(uabi_class >= ARRAY_SIZE(class_instance));
.. will trigger this assertion?
Regards, Joonas
Some of the CCS engines are disabled. They should not be listed in the uabi_engine list, that is the list of engines that the user can see.
Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Requires: 4e4f77d74878 ("drm/i915/gt: Refactor uabi engine class/instance list creation") Signed-off-by: Andi Shyti andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index cf8f24ad88f6..ec5bcd1c1ec4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -244,6 +244,18 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (uabi_class > I915_LAST_UABI_ENGINE_CLASS) continue;
+ /* + * If the CCS engine is fused off, the corresponding bit + * in the engine mask is disabled. Do not expose it + * to the user. + * + * By default at least one engine is enabled (check + * the engine_mask_apply_compute_fuses() function. + */ + if (!(engine->gt->info.engine_mask & + BIT(_CCS(engine->uabi_instance)))) + continue; + GEM_BUG_ON(uabi_class >= ARRAY_SIZE(i915->engine_uabi_class_count)); i915->engine_uabi_class_count[uabi_class]++;
On Fri, Mar 01, 2024 at 12:28:57AM +0100, Andi Shyti wrote:
Some of the CCS engines are disabled. They should not be listed in the uabi_engine list, that is the list of engines that the user can see.
Fused off engines already aren't visible to userspace (or to the kernel for that matter). For CCS engines engine_mask_apply_compute_fuses() removes the fused off engines from the runtime engine mask; other engine types are handled in similar functions. Any engine that doesn't appear in the filtered down engine_mask won't even have a 'struct intel_engine_cs' allocated for it.
Matt
Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Requires: 4e4f77d74878 ("drm/i915/gt: Refactor uabi engine class/instance list creation") Signed-off-by: Andi Shyti andi.shyti@linux.intel.com
drivers/gpu/drm/i915/gt/intel_engine_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index cf8f24ad88f6..ec5bcd1c1ec4 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -244,6 +244,18 @@ void intel_engines_driver_register(struct drm_i915_private *i915) if (uabi_class > I915_LAST_UABI_ENGINE_CLASS) continue;
/*
* If the CCS engine is fused off, the corresponding bit
* in the engine mask is disabled. Do not expose it
* to the user.
*
* By default at least one engine is enabled (check
* the engine_mask_apply_compute_fuses() function.
*/
if (!(engine->gt->info.engine_mask &
BIT(_CCS(engine->uabi_instance))))
continue;
- GEM_BUG_ON(uabi_class >= ARRAY_SIZE(i915->engine_uabi_class_count)); i915->engine_uabi_class_count[uabi_class]++;
-- 2.43.0
The hardware should not dynamically balance the load between CCS engines. Wa_14019159160 recommends disabling it across all platforms.
Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Signed-off-by: Andi Shyti andi.shyti@linux.intel.com Cc: Chris Wilson chris.p.wilson@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Matt Roper matthew.d.roper@intel.com Cc: stable@vger.kernel.org # v6.2+ --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 6 ++++++ 2 files changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 50962cfd1353..cf709f6c05ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1478,6 +1478,7 @@
#define GEN12_RCU_MODE _MMIO(0x14800) #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index d67d44611c28..57c1f3d2589e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2945,6 +2945,12 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
/* Wa_18028616096 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); + + /* + * Wa_14019159160: disable the CCS load balancing + * indiscriminately for all the platforms + */ + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); }
if (IS_DG2_G11(i915)) {
Enable only one CCS engine by default with all the compute sices allocated to it.
While generating the list of UABI engines to be exposed to the user, exclude any additional CCS engines beyond the first instance.
This change can be tested with igt i915_query.
Fixes: d2eae8e98d59 ("drm/i915/dg2: Drop force_probe requirement") Requires: 4e4f77d74878 ("drm/i915/gt: Refactor uabi engine class/instance list creation") Signed-off-by: Andi Shyti andi.shyti@linux.intel.com Cc: Chris Wilson chris.p.wilson@linux.intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Matt Roper matthew.d.roper@intel.com Cc: stable@vger.kernel.org # v6.2+ --- drivers/gpu/drm/i915/gt/intel_engine_user.c | 11 +++++++++++ drivers/gpu/drm/i915/gt/intel_gt.c | 11 +++++++++++ drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ 3 files changed, 24 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index ec5bcd1c1ec4..6d6ef11f55e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -208,6 +208,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915) struct list_head *it, *next; struct rb_node **p, *prev; LIST_HEAD(engines); + u16 uabi_ccs = 0;
sort_engines(i915, &engines);
@@ -256,6 +257,16 @@ void intel_engines_driver_register(struct drm_i915_private *i915) BIT(_CCS(engine->uabi_instance)))) continue;
+ /* + * The load is balanced among all the available compute + * slices. Expose only the first instance of the compute + * engine. + */ + if (IS_DG2(i915) && + uabi_class == I915_ENGINE_CLASS_COMPUTE && + uabi_ccs++) + continue; + GEM_BUG_ON(uabi_class >= ARRAY_SIZE(i915->engine_uabi_class_count)); i915->engine_uabi_class_count[uabi_class]++; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index a425db5ed3a2..e19df4ef47f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -168,6 +168,14 @@ static void init_unused_rings(struct intel_gt *gt) } }
+static void intel_gt_apply_ccs_mode(struct intel_gt *gt) +{ + if (!IS_DG2(gt->i915)) + return; + + intel_uncore_write(gt->uncore, XEHP_CCS_MODE, 0); +} + int intel_gt_init_hw(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; @@ -195,6 +203,9 @@ int intel_gt_init_hw(struct intel_gt *gt)
intel_gt_init_swizzling(gt);
+ /* Configure CCS mode */ + intel_gt_apply_ccs_mode(gt); + /* * At least 830 can leave some of the unused rings * "active" (ie. head != tail) after resume which diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index cf709f6c05ae..c148113770ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -1605,6 +1605,8 @@ #define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0) #define GEN12_CAGF_MASK REG_GENMASK(19, 11)
+#define XEHP_CCS_MODE _MMIO(0x14804) + #define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4)) #define GEN11_CSME (31) #define GEN12_HECI_2 (30)
linux-stable-mirror@lists.linaro.org