On 8/18/2021 11:16 PM, Matthew Brost wrote:
Rework and simplify the locking with GuC subission. Drop sched_state_no_lock and move all fields under the guc_state.sched_state and protect all these fields with guc_state.lock . This requires changing the locking hierarchy from guc_state.lock -> sched_engine.lock to sched_engine.lock -> guc_state.lock.
Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 186 ++++++++---------- drivers/gpu/drm/i915/i915_trace.h | 6 +- 3 files changed, 89 insertions(+), 108 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index c06171ee8792..d5d643b04d54 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -161,7 +161,7 @@ struct intel_context { * sched_state: scheduling state of this context using GuC * submission */
u16 sched_state;
/*u32 sched_state;
- fences: maintains of list of requests that have a submit
- fence related to GuC submission
@@ -178,9 +178,6 @@ struct intel_context { struct list_head requests; } guc_active;
- /* GuC scheduling state flags that do not require a lock. */
- atomic_t guc_sched_state_no_lock;
- /* GuC LRC descriptor ID */ u16 guc_id;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 053f4485d6e9..509b298e7cf3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -72,86 +72,23 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
#define GUC_REQUEST_SIZE 64 /* bytes */
-/*
- Below is a set of functions which control the GuC scheduling state which do
- not require a lock as all state transitions are mutually exclusive. i.e. It
- is not possible for the context pinning code and submission, for the same
- context, to be executing simultaneously. We still need an atomic as it is
- possible for some of the bits to changing at the same time though.
- */
-#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) -static inline bool context_enabled(struct intel_context *ce) -{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_ENABLED);
-}
-static inline void set_context_enabled(struct intel_context *ce) -{
- atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
-}
-static inline void clr_context_enabled(struct intel_context *ce) -{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
&ce->guc_sched_state_no_lock);
-}
-static inline bool context_pending_enable(struct intel_context *ce) -{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_PENDING_ENABLE);
-}
-static inline void set_context_pending_enable(struct intel_context *ce) -{
- atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE,
&ce->guc_sched_state_no_lock);
-}
-static inline void clr_context_pending_enable(struct intel_context *ce) -{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE,
&ce->guc_sched_state_no_lock);
-}
-static inline bool context_registered(struct intel_context *ce) -{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_REGISTERED);
-}
-static inline void set_context_registered(struct intel_context *ce) -{
- atomic_or(SCHED_STATE_NO_LOCK_REGISTERED,
&ce->guc_sched_state_no_lock);
-}
-static inline void clr_context_registered(struct intel_context *ce) -{
- atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED,
&ce->guc_sched_state_no_lock);
-}
- /*
- Below is a set of functions which control the GuC scheduling state which
- require a lock, aside from the special case where the functions are called
- from guc_lrc_desc_pin(). In that case it isn't possible for any other code
- path to be executing on the context.
*/ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3)
- require a lock.
-#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_ENABLED BIT(4) +#define SCHED_STATE_PENDING_ENABLE BIT(5) +#define SCHED_STATE_REGISTERED BIT(6) +#define SCHED_STATE_BLOCKED_SHIFT 7 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) static inline void init_sched_state(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock);
- atomic_set(&ce->guc_sched_state_no_lock, 0); ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; }
@@ -162,9 +99,8 @@ static bool sched_state_is_init(struct intel_context *ce) * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after * suspend. */
- return !(atomic_read(&ce->guc_sched_state_no_lock) &
~SCHED_STATE_NO_LOCK_REGISTERED) &&
!(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK);
return !(ce->guc_state.sched_state &=
~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
}
static inline bool
@@ -237,6 +173,57 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; }
+static inline bool context_enabled(struct intel_context *ce) +{
- return ce->guc_state.sched_state & SCHED_STATE_ENABLED;
+}
+static inline void set_context_enabled(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state |= SCHED_STATE_ENABLED;
+}
+static inline void clr_context_enabled(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED;
+}
+static inline bool context_pending_enable(struct intel_context *ce) +{
- return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE;
+}
+static inline void set_context_pending_enable(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE;
+}
+static inline void clr_context_pending_enable(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE;
+}
+static inline bool context_registered(struct intel_context *ce) +{
- return ce->guc_state.sched_state & SCHED_STATE_REGISTERED;
+}
+static inline void set_context_registered(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state |= SCHED_STATE_REGISTERED;
+}
+static inline void clr_context_registered(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED;
+}
- static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -245,7 +232,6 @@ static inline u32 context_blocked(struct intel_context *ce)
static inline void incr_context_blocked(struct intel_context *ce) {
lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock);
ce->guc_state.sched_state += SCHED_STATE_BLOCKED;
@@ -255,7 +241,6 @@ static inline void incr_context_blocked(struct intel_context *ce)
static inline void decr_context_blocked(struct intel_context *ce) {
lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */
@@ -450,6 +435,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 g2h_len_dw = 0; bool enabled;
- lockdep_assert_held(&rq->engine->sched_engine->lock);
- /*
- Corner case where requests were sitting in the priority list or a
- request resubmitted after the context was banned.
@@ -457,7 +444,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(intel_context_is_banned(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine);
goto out;
return 0;
}
GEM_BUG_ON(!atomic_read(&ce->guc_id_ref));
@@ -470,9 +457,11 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err))
goto out;
return err;
}
spin_lock(&ce->guc_state.lock);
/*
- The request / context will be run on the hardware when scheduling
- gets enabled in the unblock.
@@ -507,6 +496,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_i915_request_guc_submit(rq);
out:
- spin_unlock(&ce->guc_state.lock); return err; }
@@ -727,8 +717,6 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(&guc_to_gt(guc)->irq_lock); spin_unlock_irq(&guc_to_gt(guc)->irq_lock);
guc_flush_submissions(guc);
flush_work(&guc->ct.requests.worker);
scrub_guc_desc_for_outstanding_g2h(guc);
@@ -1133,7 +1121,11 @@ static int steal_guc_id(struct intel_guc *guc)
list_del_init(&ce->guc_id_link); guc_id = ce->guc_id;
clr_context_registered(ce);spin_lock(&ce->guc_state.lock);
spin_unlock(&ce->guc_state.lock);
- set_context_guc_id_invalid(ce); return guc_id; } else {
@@ -1169,6 +1161,8 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) try_again: spin_lock_irqsave(&guc->contexts_lock, flags);
- might_lock(&ce->guc_state.lock);
- if (context_guc_id_invalid(ce)) { ret = assign_guc_id(guc, &ce->guc_id); if (ret)
@@ -1248,8 +1242,13 @@ static int register_context(struct intel_context *ce, bool loop) trace_intel_context_register(ce);
ret = __guc_action_register_context(guc, ce->guc_id, offset, loop);
- if (likely(!ret))
if (likely(!ret)) {
unsigned long flags;
spin_lock_irqsave(&ce->guc_state.lock, flags);
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
return ret; }
@@ -1525,7 +1524,6 @@ static u16 prep_context_pending_disable(struct intel_context *ce) static struct i915_sw_fence *guc_context_block(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref;
@@ -1534,13 +1532,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
/*
* Sync with submission path, increment before below changes to context
* state.
*/
spin_lock(&sched_engine->lock); incr_context_blocked(ce);
spin_unlock(&sched_engine->lock);
enabled = context_enabled(ce); if (unlikely(!enabled || submission_disabled(guc))) {
@@ -1569,7 +1561,6 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref;
@@ -1594,13 +1585,7 @@ static void guc_context_unblock(struct intel_context *ce) intel_context_get(ce); }
/*
* Sync with submission path, decrement after above changes to context
* state.
*/
spin_lock(&sched_engine->lock); decr_context_blocked(ce);
spin_unlock(&sched_engine->lock);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1710,7 +1695,9 @@ static void guc_context_sched_disable(struct intel_context *ce)
if (submission_disabled(guc) || context_guc_id_invalid(ce) || !lrc_desc_registered(guc, ce->guc_id)) {
spin_lock_irqsave(&ce->guc_state.lock, flags);
We do take this lock a few lines below this. Would it be worth just moving that up and do everything under the lock?
Anyway, all calls to the updated set/clr functions have been updated to be correctly locked, so:
Reviewed-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
Daniele
clr_context_enabled(ce);
goto unpin; }spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -1760,7 +1747,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce));
- clr_context_registered(ce); deregister_context(ce, ce->guc_id, true); }
@@ -1833,8 +1819,10 @@ static void guc_context_destroy(struct kref *kref) /* Seal race with Reset */ spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc);
- if (likely(!disabled))
- if (likely(!disabled)) { set_context_destroyed(ce);
clr_context_registered(ce);
- } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { release_guc_id(guc, ce);
@@ -2697,8 +2685,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm,
"Bad context sched_state 0x%x, 0x%x, desc_idx %u",
atomic_read(&ce->guc_sched_state_no_lock),
return -EPROTO; }"Bad context sched_state 0x%x, desc_idx %u", ce->guc_state.sched_state, desc_idx);
@@ -2713,7 +2700,9 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, } #endif
clr_context_pending_enable(ce);spin_lock_irqsave(&ce->guc_state.lock, flags);
} else if (context_pending_disable(ce)) { bool banned;spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -2987,9 +2976,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", atomic_read(&ce->guc_id_ref));
drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n",
ce->guc_state.sched_state,
atomic_read(&ce->guc_sched_state_no_lock));
drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
ce->guc_state.sched_state);
guc_log_context_priority(p, ce); }
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..0a77eb2944b5 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -903,7 +903,6 @@ DECLARE_EVENT_CLASS(intel_context, __field(u32, guc_id) __field(int, pin_count) __field(u32, sched_state)
__field(u32, guc_sched_state_no_lock) __field(u8, guc_prio) ),
@@ -911,15 +910,12 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state;
__entry->guc_sched_state_no_lock =
atomic_read(&ce->guc_sched_state_no_lock); __entry->guc_prio = ce->guc_prio; ),
TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u",
TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state,
);__entry->guc_sched_state_no_lock, __entry->guc_prio)