6.12-stable review patch. If anyone has any objections, please let me know.
------------------
From: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com
[ Upstream commit 66c8f7b435bddb7d8577ac8a57e175a6cb147227 ]
For determining actual job execution time, save the current value of the CTX_TIMESTAMP register rather than the value saved in LRC since the current register value is the closest to the start time of the job.
v2: Define MI_STORE_REGISTER_MEM to fix compile error v3: Place MI_STORE_REGISTER_MEM sorted by MI_INSTR (Lucas)
Fixes: 65921374c48f ("drm/xe: Emit ctx timestamp copy in ring ops") Signed-off-by: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com Reviewed-by: Matthew Brost matthew.brost@intel.com Reviewed-by: Lucas De Marchi lucas.demarchi@intel.com Link: https://lore.kernel.org/r/20250509161159.2173069-6-umesh.nerlige.ramappa@int... (cherry picked from commit 38b14233e5deff51db8faec287b4acd227152246) Signed-off-by: Lucas De Marchi lucas.demarchi@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 4 ++++ drivers/gpu/drm/xe/xe_lrc.c | 2 +- drivers/gpu/drm/xe/xe_ring_ops.c | 7 ++----- 3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 10ec2920d31b3..d4033278be9fc 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -47,6 +47,10 @@ #define MI_LRI_FORCE_POSTED REG_BIT(12) #define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4)) +#define MI_SRM_USE_GGTT REG_BIT(22) +#define MI_SRM_ADD_CS_OFFSET REG_BIT(19) + #define MI_FLUSH_DW __MI_INSTR(0x26) #define MI_FLUSH_DW_STORE_INDEX REG_BIT(21) #define MI_INVALIDATE_TLB REG_BIT(18) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index aec7db39c061e..2d4e38b3bab19 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -694,7 +694,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) { - /* The start seqno is stored in the driver-defined portion of PPHWSP */ + /* This is stored in the driver-defined portion of PPHWSP */ return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; }
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 9f327f27c0726..fb31e09acb519 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -229,13 +229,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i) { - dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT | - MI_COPY_MEM_MEM_DST_GGTT; + dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; + dw[i++] = RING_CTX_TIMESTAMP(0).addr; dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); dw[i++] = 0; - dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc); - dw[i++] = 0; - dw[i++] = MI_NOOP;
return i; }