June 2020 - Linux-stable-mirror

[PATCH 076/185] drm/i915: Mark concurrent submissions with a weak-dependency

by Chris Wilson

We recorded the dependencies for WAIT_FOR_SUBMIT in order that we could correctly perform priority inheritance from the parallel branches to the common trunk. However, for the purpose of timeslicing and reset handling, the dependency is weak -- as we the pair of requests are allowed to run in parallel and not in strict succession. The real significance though is that this allows us to rearrange groups of WAIT_FOR_SUBMIT linked requests along the single engine, and so can resolve user level inter-batch scheduling dependencies from user semaphores. Fixes: c81471f5e95c ("drm/i915: Copy across scheduler behaviour flags across submit fences") Testcase: igt/gem_exec_fence/submit Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com> Cc: <stable(a)vger.kernel.org> # v5.6+ Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200507155109.8892-1-chris@c… (cherry picked from commit 6b6cd2ebd8d071e55998e32b648bb8081f7f02bb) --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 8 ++++++-- drivers/gpu/drm/i915/i915_scheduler.c | 6 +++--- drivers/gpu/drm/i915/i915_scheduler.h | 3 ++- drivers/gpu/drm/i915/i915_scheduler_types.h | 1 + 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 2237d03545440..e57b827b00b1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1902,6 +1902,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ee406fa6774ad..ed1af57d1db18 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1048,7 +1048,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, &from->sched); + ret = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_EXTERNAL); if (ret < 0) return ret; } @@ -1179,7 +1181,9 @@ __i915_request_await_execution(struct i915_request *to, /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, &from->sched); + err = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_WEAK); if (err < 0) return err; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index e99423e548c99..09d6d4538ff38 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -463,7 +463,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, } int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal) + struct i915_sched_node *signal, + unsigned long flags) { struct i915_dependency *dep; @@ -472,8 +473,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, return -ENOMEM; if (!__i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_EXTERNAL | - I915_DEPENDENCY_ALLOC)) + flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); return 0; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index d1dc4efef77b5..6f0bf00fc5690 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, unsigned long flags); int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal); + struct i915_sched_node *signal, + unsigned long flags); void i915_sched_node_fini(struct i915_sched_node *node); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index d18e705500542..7186875088a0a 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -78,6 +78,7 @@ struct i915_dependency { unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_EXTERNAL BIT(1) +#define I915_DEPENDENCY_WEAK BIT(2) }; #endif /* _I915_SCHEDULER_TYPES_H_ */ --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 060/185] drm/i915/execlists: Track inflight CCID

by Chris Wilson

The presumption is that by using a circular counter that is twice as large as the maximum ELSP submission, we would never reuse the same CCID for two inflight contexts. However, if we continually preempt an active context such that it always remains inflight, it can be resubmitted with an arbitrary number of paired contexts. As each of its paired contexts will use a new CCID, eventually it will wrap and submit two ELSP with the same CCID. Rather than use a simple circular counter, switch over to a small bitmap of inflight ids so we can avoid reusing one that is still potentially active. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.5+ Reviewed-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-2-chris@… (cherry picked from commit 5c4a53e3b1cbc38d0906e382f1037290658759bb) --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 29 +++++++++++++++----- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index e0de4af9a3481..781f94ce65f65 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -308,8 +308,7 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; - unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) + unsigned long context_tag; struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dfcc44171df1c..adfaf52ca8d08 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1321,13 +1321,17 @@ __execlists_schedule_in(struct i915_request *rq) if (ce->tag) { /* Use a fixed tag for OA and friends */ + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc.ccid = - (++engine->context_tag % NUM_CONTEXT_TAG) << - (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(engine->context_tag); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } ce->lrc.ccid |= engine->execlists.ccid; @@ -1371,7 +1375,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1389,6 +1394,14 @@ __execlists_schedule_out(struct i915_request *rq, i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1414,15 +1427,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -3883,7 +3898,7 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); - engine->context_tag = 0; + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 6e3d378ebbbd6..820c6ba755cce 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1326,11 +1326,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); /* * Pick an unused context id - * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts + * 0 - BITS_PER_LONG are used by other contexts * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context */ stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG); break; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 58b5f40a07dd6..af89c7fc8f593 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -173,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { + for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx) --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 059/185] drm/i915/execlists: Avoid reusing the same logical CCID

by Chris Wilson

The bspec is confusing on the nature of the upper 32bits of the LRC descriptor. Once upon a time, it said that it uses the upper 32b to decide if it should perform a lite-restore, and so we must ensure that each unique context submitted to HW is given a unique CCID [for the duration of it being on the HW]. Currently, this is achieved by using a small circular tag, and assigning every context submitted to HW a new id. However, this tag is being cleared on repinning an inflight context such that we end up re-using the 0 tag for multiple contexts. To avoid accidentally clearing the CCID in the upper 32bits of the LRC descriptor, split the descriptor into two dwords so we can update the GGTT address separately from the CCID. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.5+ Reviewed-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-1-chris@… (cherry picked from commit 2632f174a2e1a5fd40a70404fa8ccfd0b1f79ebd) --- drivers/gpu/drm/i915/gt/intel_context_types.h | 8 ++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 57 ++++++++----------- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- 7 files changed, 44 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e887a2ff28c50..45d257be44308 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,7 +69,13 @@ struct intel_context { #define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; - u64 lrc_desc; + union { + struct { + u32 lrca; + u32 ccid; + }; + u64 desc; + } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ /* Time on GPU as tracked by the hw. */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 1a2887fac48d6..a1bd3e3302774 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1411,9 +1411,9 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, int len; len = scnprintf(hdr, sizeof(hdr), - "\t\tActive[%d]: ccid:%08x, ", + "\t\tActive[%d]: ccid:%08x, ", (int)(port - execlists->active), - upper_32_bits(rq->context->lrc_desc)); + rq->context->lrc.ccid); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); @@ -1425,7 +1425,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, len = scnprintf(hdr, sizeof(hdr), "\t\tPending[%d]: ccid:%08x, ", (int)(port - execlists->pending), - upper_32_bits(rq->context->lrc_desc)); + rq->context->lrc.ccid); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); print_request(m, rq, hdr); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 26ba9f6ab0c7a..e0de4af9a3481 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -155,6 +155,11 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @ccid: identifier for contexts submitted to this engine + */ + u32 ccid; + /** * @yield: CCID at the time of the last semaphore-wait interrupt. * diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 59569bc5b6a48..dfcc44171df1c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -560,10 +560,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -574,21 +574,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -1285,7 +1271,7 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) @@ -1333,18 +1319,19 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); - ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; + ce->lrc.ccid = + (++engine->context_tag % NUM_CONTEXT_TAG) << + (GEN11_SW_CTX_ID_SHIFT - 32); BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1443,7 +1430,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1482,7 +1469,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1503,8 +1490,9 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) if (!rq) return ""; - snprintf(buf, buflen, "%s%llx:%lld%s prio %d", + snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", prefix, + rq->context->lrc.ccid, rq->fence.context, rq->fence.seqno, i915_request_completed(rq) ? "!" : i915_request_started(rq) ? "*" : @@ -1880,7 +1868,7 @@ timeslice_yield(const struct intel_engine_execlists *el, * safe, yield if it might be stuck -- it will be given a fresh * timeslice in the near future. */ - return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); + return rq->context->lrc.ccid == READ_ONCE(el->yield); } static bool @@ -2907,7 +2895,7 @@ active_context(struct intel_engine_cs *engine, u32 ccid) */ for (port = el->active; (rq = *port); port++) { - if (upper_32_bits(rq->context->lrc_desc) == ccid) { + if (rq->context->lrc.ccid == ccid) { ENGINE_TRACE(engine, "ccid found at active:%zd\n", port - el->active); @@ -2916,7 +2904,7 @@ active_context(struct intel_engine_cs *engine, u32 ccid) } for (port = el->pending; (rq = *port); port++) { - if (upper_32_bits(rq->context->lrc_desc) == ccid) { + if (rq->context->lrc.ccid == ccid) { ENGINE_TRACE(engine, "ccid found at pending:%zd\n", port - el->pending); @@ -3337,7 +3325,7 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3366,7 +3354,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -4077,7 +4065,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4880,6 +4868,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } + /* Finally, take ownership and responsibility for cleanup! */ engine->sanitize = execlists_sanitize; engine->release = execlists_release; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe7778c28d2d7..aa6d56e25a10a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ctx_desc = rq->context->lrc.ccid; u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 68932b33c0577..9be89b8d952c1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -286,7 +286,7 @@ static void shadow_context_descriptor_update(struct intel_context *ce, struct intel_vgpu_workload *workload) { - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; /* * Update bits 0-11 of the context descriptor which includes flags @@ -296,7 +296,7 @@ shadow_context_descriptor_update(struct intel_context *ce, desc |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - ce->lrc_desc = desc; + ce->lrc.desc = desc; } static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a5bddcc5bb334..6e3d378ebbbd6 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1309,8 +1309,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - stream->specific_ctx_id = - lower_32_bits(ce->lrc_desc) >> 12; + stream->specific_ctx_id = ce->lrc.lrca >> 12; /* * GuC uses the top bit to signal proxy submission, so --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 030/185] drm/i915/execlists: Enable timeslice on partial virtual engine dequeue

by Chris Wilson

If we stop filling the ELSP due to an incompatible virtual engine request, check if we should enable the timeslice on behalf of the queue. This fixes the case where we are inspecting the last->next element when we know that the last element is the last request in the execution queue, and so decided we did not need to enable timeslicing despite the intent to do so! Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com> Cc: <stable(a)vger.kernel.org> # v5.4+ Reviewed-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200306113012.3184606-1-chri… (cherry picked from commit 3df2deed411e0f1b7312baf0139aab8bba4c0410) --- drivers/gpu/drm/i915/gt/intel_lrc.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 354c68b74cec0..114c2b070a6d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1839,11 +1839,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) if (!intel_engine_has_timeslices(engine)) return false; - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return false; - - hint = max(rq_prio(list_next_entry(rq, sched.link)), - engine->execlists.queue_priority_hint); + hint = engine->execlists.queue_priority_hint; + if (!list_is_last(&rq->sched.link, &engine->active.requests)) + hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); return hint >= effective_prio(rq); } @@ -1885,6 +1883,18 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } +static void start_timeslice(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + + execlists->switch_priority_hint = execlists->queue_priority_hint; + + if (timer_pending(&execlists->timer)) + return; + + set_timer_ms(&execlists->timer, timeslice(engine)); +} + static void record_preemption(struct intel_engine_execlists *execlists) { (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); @@ -2048,11 +2058,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - if (!execlists->timer.expires && - need_timeslice(engine, last)) - set_timer_ms(&execlists->timer, - timeslice(engine)); - + start_timeslice(engine); return; } } @@ -2087,7 +2093,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this for another */ + start_timeslice(engine); + return; /* leave this for another sibling */ } ENGINE_TRACE(engine, --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 025/185] drm/i915: Return early for await_start on same timeline

by Chris Wilson

Requests within a timeline are ordered by that timeline, so awaiting for the start of a request within the timeline is a no-op. This used to work by falling out of the mutex_trylock() as the signaler and waiter had the same timeline and not returning an error. Fixes: 6a79d848403d ("drm/i915: Lock signaler timeline while navigating") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.5+ Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200305134822.2750496-1-chri… (cherry picked from commit ab7a69020fb5d5c7ba19fba60f62fd6f9ca9f779) --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 6c387fa1a8547..1213009f31fbe 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -830,8 +830,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct dma_fence *fence; int err; - GEM_BUG_ON(i915_request_timeline(rq) == - rcu_access_pointer(signal->timeline)); + if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)) + return 0; if (i915_request_started(signal)) return 0; --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 024/185] drm/i915: Actually emit the await_start

by Chris Wilson

Fix the inverted test to emit the wait on the end of the previous request if we /haven't/ already. Fixes: 6a79d848403d ("drm/i915: Lock signaler timeline while navigating") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.5+ Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200305104210.2619967-1-chri… (cherry picked from commit 07e9c59d63df6a1c44c1975c01827ba18b69270a) --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 191a538afa5a8..6c387fa1a8547 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -875,7 +875,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) return 0; err = 0; - if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) err = i915_sw_fence_await_dma_fence(&rq->submit, fence, 0, I915_FENCE_GFP); --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

[PATCH 011/185] drm/i915/execlists: Always force a context reload when rewinding RING_TAIL

by Chris Wilson

If we rewind the RING_TAIL on a context, due to a preemption event, we must force the context restore for the RING_TAIL update to be properly handled. Rather than note which preemption events may cause us to rewind the tail, compare the new request's tail with the previously submitted RING_TAIL, as it turns out that timeslicing was causing unexpected rewinds. <idle>-0 0d.s2 1280851190us : __execlists_submission_tasklet: 0000:00:02.0 rcs0: expired last=130:4698, prio=3, hint=3 <idle>-0 0d.s2 1280851192us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 66:119966, current 119964 <idle>-0 0d.s2 1280851195us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4698, current 4695 <idle>-0 0d.s2 1280851198us : __i915_request_unsubmit: 0000:00:02.0 rcs0: fence 130:4696, current 4695 ^---- Note we unwind 2 requests from the same context <idle>-0 0d.s2 1280851208us : __i915_request_submit: 0000:00:02.0 rcs0: fence 130:4696, current 4695 <idle>-0 0d.s2 1280851213us : __i915_request_submit: 0000:00:02.0 rcs0: fence 134:1508, current 1506 ^---- But to apply the new timeslice, we have to replay the first request before the new client can start -- the unexpected RING_TAIL rewind <idle>-0 0d.s2 1280851219us : trace_ports: 0000:00:02.0 rcs0: submit { 130:4696*, 134:1508 } synmark2-5425 2..s. 1280851239us : process_csb: 0000:00:02.0 rcs0: cs-irq head=5, tail=0 synmark2-5425 2..s. 1280851240us : process_csb: 0000:00:02.0 rcs0: csb[0]: status=0x00008002:0x00000000 ^---- Preemption event for the ELSP update; note the lite-restore synmark2-5425 2..s. 1280851243us : trace_ports: 0000:00:02.0 rcs0: preempted { 130:4698, 66:119966 } synmark2-5425 2..s. 1280851246us : trace_ports: 0000:00:02.0 rcs0: promote { 130:4696*, 134:1508 } synmark2-5425 2.... 1280851462us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4700, current 4695 synmark2-5425 2.... 1280852111us : __i915_request_commit: 0000:00:02.0 rcs0: fence 130:4702, current 4695 synmark2-5425 2.Ns1 1280852296us : process_csb: 0000:00:02.0 rcs0: cs-irq head=0, tail=2 synmark2-5425 2.Ns1 1280852297us : process_csb: 0000:00:02.0 rcs0: csb[1]: status=0x00000814:0x00000000 synmark2-5425 2.Ns1 1280852299us : trace_ports: 0000:00:02.0 rcs0: completed { 130:4696!, 134:1508 } synmark2-5425 2.Ns1 1280852301us : process_csb: 0000:00:02.0 rcs0: csb[2]: status=0x00000818:0x00000040 synmark2-5425 2.Ns1 1280852302us : trace_ports: 0000:00:02.0 rcs0: completed { 134:1508, 0:0 } synmark2-5425 2.Ns1 1280852313us : process_csb: process_csb:2336 GEM_BUG_ON(!i915_request_completed(*execlists->active) && !reset_in_progress(execlists)) Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Referenecs: 82c69bf58650 ("drm/i915/gt: Detect if we miss WaIdleLiteRestore") Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com> Cc: <stable(a)vger.kernel.org> # v5.4+ Link: https://patchwork.freedesktop.org/patch/msgid/20200207211452.2860634-1-chri… (cherry picked from commit 5ba32c7be81e53ea8a27190b0f6be98e6c6779af) --- drivers/gpu/drm/i915/gt/intel_lrc.c | 18 ++++++++---------- drivers/gpu/drm/i915/gt/intel_ring.c | 1 + drivers/gpu/drm/i915/gt/intel_ring.h | 8 ++++++++ drivers/gpu/drm/i915/gt/intel_ring_types.h | 1 + 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index cded952e7ddf1..742c08e79409d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1446,7 +1446,7 @@ static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; u64 desc = ce->lrc_desc; - u32 tail; + u32 tail, prev; /* * WaIdleLiteRestore:bdw,skl @@ -1459,9 +1459,15 @@ static u64 execlists_update_context(struct i915_request *rq) * subsequent resubmissions (for lite restore). Should that fail us, * and we try and submit the same tail again, force the context * reload. + * + * If we need to return to a preempted context, we need to skip the + * lite-restore and force it to reload the RING_TAIL. Otherwise, the + * HW has a tendency to ignore us rewinding the TAIL to the end of + * an earlier request. */ tail = intel_ring_set_tail(rq->ring, rq->tail); - if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + prev = ce->lrc_reg_state[CTX_RING_TAIL]; + if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; rq->tail = rq->wa_tail; @@ -1979,14 +1985,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ __unwind_incomplete_requests(engine); - /* - * If we need to return to the preempted context, we - * need to skip the lite-restore and force it to - * reload the RING_TAIL. Otherwise, the HW has a - * tendency to ignore us rewinding the TAIL to the - * end of an earlier request. - */ - last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0b..6ff803f397c4d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) kref_init(&ring->ref); ring->size = size; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size); /* * Workaround an erratum on the i830 which causes a hang if diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index ea2839d9e0445..5bdce24994aa0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) return pos & (ring->size - 1); } +static inline int intel_ring_direction(const struct intel_ring *ring, + u32 next, u32 prev) +{ + typecheck(typeof(ring->size), next); + typecheck(typeof(ring->size), prev); + return (next - prev) << ring->wrap; +} + static inline bool intel_ring_offset_valid(const struct intel_ring *ring, unsigned int pos) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index d9f17f38e0cce..3cd7fec7fd8d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -45,6 +45,7 @@ struct intel_ring { u32 space; u32 size; + u32 wrap; u32 effective_size; }; --------------------------------------------------------------------- Intel Corporation (UK) Limited Registered No. 1134945 (England) Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47 This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.

5 years, 6 months

1
0
0 0

Re: [PATCH 1/1] selftests: fpga: dfl: A test for afu interrupt support

by Greg KH

On Tue, Jun 09, 2020 at 06:02:08AM -0700, trix(a)redhat.com wrote: > From: Tom Rix <trix(a)redhat.com> > > Check that the ioctl DFL_FPGA_PORT_ERR_GET_IRQ_NUM returns > an expected result. > > Tested on vf device 0xbcc1 > > Sample run with > $ sudo make -C tools/testing/selftests TARGETS=drivers/fpga run_tests > ... > ok 1 selftests: drivers/fpga: afu_intr > > Signed-off-by: Tom Rix <trix(a)redhat.com> > --- > tools/testing/selftests/Makefile | 1 + > tools/testing/selftests/drivers/fpga/Makefile | 9 +++++ > .../testing/selftests/drivers/fpga/afu_intr.c | 38 +++++++++++++++++++ > tools/testing/selftests/drivers/fpga/config | 1 + > 4 files changed, 49 insertions(+) > create mode 100644 tools/testing/selftests/drivers/fpga/Makefile > create mode 100644 tools/testing/selftests/drivers/fpga/afu_intr.c > create mode 100644 tools/testing/selftests/drivers/fpga/config > > diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile > index 1195bd85af38..4c6eda659125 100644 > --- a/tools/testing/selftests/Makefile > +++ b/tools/testing/selftests/Makefile > @@ -9,6 +9,7 @@ TARGETS += clone3 > TARGETS += cpufreq > TARGETS += cpu-hotplug > TARGETS += drivers/dma-buf > +TARGETS += drivers/fpga > TARGETS += efivarfs > TARGETS += exec > TARGETS += filesystems > diff --git a/tools/testing/selftests/drivers/fpga/Makefile b/tools/testing/selftests/drivers/fpga/Makefile > new file mode 100644 > index 000000000000..0a472e8c67c5 > --- /dev/null > +++ b/tools/testing/selftests/drivers/fpga/Makefile > @@ -0,0 +1,9 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +CFLAGS += -I../../../../../usr/include/ > +CFLAGS += -I../../../../../include/uapi/ > + > +TEST_GEN_PROGS := afu_intr > + > +top_srcdir ?=../../../../.. > + > +include ../../lib.mk > diff --git a/tools/testing/selftests/drivers/fpga/afu_intr.c b/tools/testing/selftests/drivers/fpga/afu_intr.c > new file mode 100644 > index 000000000000..aa1efba94605 > --- /dev/null > +++ b/tools/testing/selftests/drivers/fpga/afu_intr.c > @@ -0,0 +1,38 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <stdio.h> > +#include <stdlib.h> > +#include <stdint.h> > +#include <string.h> > +#include <linux/fcntl.h> > +#include <linux/fpga-dfl.h> > + > +#define TEST_PREFIX "drivers/fpga/afu_intr" > + > +int main(int argc, char *argv[]) > +{ > + int devfd, status; > + struct dfl_fpga_port_info port_info; > + uint32_t irq_num; > + > + devfd = open("/dev/dfl-port.0", O_RDONLY); > + if (devfd < 0) { > + printf("%s: [skip,no-ufpgaintr]\n", TEST_PREFIX); > + exit(77); > + } > + > + /* > + * From fpga-dl.h : > + * Currently hardware supports up to 1 irq. > + * Return: 0 on success, -errno on failure. > + */ > + irq_num = -1; > + status = ioctl(devfd, DFL_FPGA_PORT_ERR_GET_IRQ_NUM, &irq_num); > + if (status != 0 || irq_num > 255) { > + printf("%s: [FAIL,err-get-irq-num]\n", TEST_PREFIX); > + close(devfd); > + exit(1); > + } > + > + close(devfd); > + return 0; > +} Why not use the ksft_* functions and frameworks to properly print out the test status and results so that tools can correctly parse it? It's generally bad-form to make up your own format. thanks, gre gk-h

5 years, 6 months

3
2
0 0

[patch 1/3] clocksource: Remove obsolete ifdef

by Thomas Gleixner

CONFIG_GENERIC_VDSO_CLOCK_MODE was a transitional config switch which got removed after all architectures got converted to the new storage model. But the removal forgot to remove the #ifdef which guards the vdso_clock_mode sanity check, which effectively disables the sanity check. Remove it now. Fixes: f86fd32db706 ("lib/vdso: Cleanup clock mode storage leftovers") Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Cc: stable(a)vger.kernel.org --- kernel/time/clocksource.c | 2 -- 1 file changed, 2 deletions(-) --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource_arch_init(cs); -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE if (cs->vdso_clock_mode < 0 || cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) { pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n", cs->name, cs->vdso_clock_mode); cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; } -#endif /* Initialize mult/shift and max_idle_ns */ __clocksource_update_freq_scale(cs, scale, freq);

5 years, 6 months

2
1
0 0

[patch 2/3] lib/vdso: Provide sanity check for cycles (again)

by Thomas Gleixner

The original x86 VDSO implementation checked for the validity of the clock source read by testing whether the returned signed cycles value is less than zero. This check was also used by the vdso read function to signal that the current selected clocksource is not VDSO capable. During the rework of the VDSO code the check was removed and replaced with a check for the clocksource mode being != NONE. This turned out to be a mistake because the check is necessary for paravirt and hyperv clock sources. The reason is that these clock sources have their own internal sequence counter to validate the clocksource at the point of reading it. This is necessary because the hypervisor can invalidate the clocksource asynchronously so a check during the VDSO data update is not sufficient. Having a separate indicator for the validity is slower than just validating the cycles value. The check for it being negative turned out to be the fastest implementation and safe as it would require an uptime of ~73 years with a 4GHz counter frequency to result in a false positive. Add an optional function to validate the cycles with a default implementation which allows the compiler to optimize it out for architectures which do not require it. Reported-by: Miklos Szeredi <miklos(a)szeredi.hu> Fixes: 5d51bee725cc ("clocksource: Add common vdso clock mode storage") Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Cc: stable(a)vger.kernel.org --- lib/vdso/gettimeofday.c | 11 +++++++++++ 1 file changed, 11 insertions(+) --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(c } #endif +#ifndef vdso_cycles_ok +static inline bool vdso_cycles_ok(u64 cycles) +{ + return true; +} +#endif + #ifdef CONFIG_TIME_NS static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) @@ -62,6 +69,8 @@ static int do_hres_timens(const struct v return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -130,6 +139,8 @@ static __always_inline int do_hres(const return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);

5 years, 6 months

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2020