On 7/22/2021 4:54 PM, Matthew Brost wrote:
This adds GuC backend support for i915_request_cancel(), which in turn makes CONFIG_DRM_I915_REQUEST_TIMEOUT work.
This implemenation makes use of fence while there is likely simplier options. A fence was choosen because of another feature coming soon which requires a user to block on a context until scheduling is disabled. In that case we return the fence to the user and the user can wait on that fence.
v2: (Daniele)
- A comment about locking the blocked incr / decr
- A comments about the use of the fence
- Update commit message explaining why fence
- Delete redundant check blocked count in unblock function
- Ring buffer implementation
- Comment about blocked in submission path
- Shorter rpm path
Signed-off-by: Matthew Brost matthew.brost@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com
drivers/gpu/drm/i915/gt/intel_context.c | 13 ++ drivers/gpu/drm/i915/gt/intel_context.h | 7 + drivers/gpu/drm/i915/gt/intel_context_types.h | 7 + .../drm/i915/gt/intel_execlists_submission.c | 18 ++ .../gpu/drm/i915/gt/intel_ring_submission.c | 16 ++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 175 ++++++++++++++++++ drivers/gpu/drm/i915/i915_request.c | 14 +- 7 files changed, 237 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 237b70e98744..477c42d7d693 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -366,6 +366,12 @@ static int __intel_context_active(struct i915_active *active) return 0; }
+static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
enum i915_sw_fence_notify state)
+{
- return NOTIFY_DONE;
+}
- void intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) {
@@ -399,6 +405,13 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) ce->guc_id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id_link);
- /*
* Initialize fence to be complete as this is expected to be complete
* unless there is a pending schedule disable outstanding.
*/
- i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify);
- i915_sw_fence_commit(&ce->guc_blocked);
- i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); }
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 814d9277096a..876bdb08303c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -70,6 +70,13 @@ intel_context_is_pinned(struct intel_context *ce) return atomic_read(&ce->pin_count); }
+static inline void intel_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
+{
- GEM_BUG_ON(!ce->ops->cancel_request);
- return ce->ops->cancel_request(ce, rq);
+}
- /**
- intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
- @ce - the context
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 57c19ee3e313..005a64f2afa7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -13,6 +13,7 @@ #include <linux/types.h>
#include "i915_active_types.h" +#include "i915_sw_fence.h" #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" @@ -42,6 +43,9 @@ struct intel_context_ops { void (*unpin)(struct intel_context *ce); void (*post_unpin)(struct intel_context *ce);
- void (*cancel_request)(struct intel_context *ce,
struct i915_request *rq);
- void (*enter)(struct intel_context *ce); void (*exit)(struct intel_context *ce);
@@ -184,6 +188,9 @@ struct intel_context { * GuC ID link - in list when unpinned but guc_id still valid in GuC */ struct list_head guc_id_link;
/* GuC context blocked fence */
struct i915_sw_fence guc_blocked; };
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 60427b106bad..e7b42222b71d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -114,6 +114,7 @@ #include "gen8_engine_cs.h" #include "intel_breadcrumbs.h" #include "intel_context.h" +#include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" #include "intel_engine_stats.h" #include "intel_execlists_submission.h" @@ -2587,11 +2588,26 @@ static int execlists_context_alloc(struct intel_context *ce) return lrc_alloc(ce, ce->engine); }
+static void execlists_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
+{
- struct intel_engine_cs *engine = NULL;
- i915_request_active_engine(rq, &engine);
- if (engine && intel_engine_pulse(engine))
intel_gt_handle_error(engine->gt, engine->mask, 0,
"request cancellation by %s",
current->comm);
+}
static const struct intel_context_ops execlists_context_ops = { .flags = COPS_HAS_INFLIGHT,
.alloc = execlists_context_alloc,
.cancel_request = execlists_context_cancel_request,
.pre_pin = execlists_context_pre_pin, .pin = execlists_context_pin, .unpin = lrc_unpin,
@@ -3604,6 +3620,8 @@ static const struct intel_context_ops virtual_context_ops = {
.alloc = virtual_context_alloc,
- .cancel_request = execlists_context_cancel_request,
- .pre_pin = virtual_context_pre_pin, .pin = virtual_context_pin, .unpin = lrc_unpin,
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 05bb9f449df1..2958e2fae380 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -16,6 +16,7 @@ #include "intel_reset.h" #include "intel_ring.h" #include "shmem_utils.h" +#include "intel_engine_heartbeat.h"
/* Rough estimate of the typical request size, performing a flush,
- set-context and then emitting the batch.
@@ -604,9 +605,24 @@ static void ring_context_ban(struct intel_context *ce, } }
+static void ring_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
+{
- struct intel_engine_cs *engine = NULL;
- i915_request_active_engine(rq, &engine);
- if (engine && intel_engine_pulse(engine))
intel_gt_handle_error(engine->gt, engine->mask, 0,
"request cancellation by %s",
current->comm);
+}
static const struct intel_context_ops ring_context_ops = { .alloc = ring_context_alloc,
.cancel_request = ring_context_cancel_request,
.ban = ring_context_ban,
.pre_pin = ring_context_pre_pin,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 3b9ba3bdd425..a26754b9b940 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -81,6 +81,11 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); */ #define SCHED_STATE_NO_LOCK_ENABLED BIT(0) #define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) +#define SCHED_STATE_NO_LOCK_BLOCKED_SHIFT 2 +#define SCHED_STATE_NO_LOCK_BLOCKED \
- BIT(SCHED_STATE_NO_LOCK_BLOCKED_SHIFT)
+#define SCHED_STATE_NO_LOCK_BLOCKED_MASK \
- (0xffff << SCHED_STATE_NO_LOCK_BLOCKED_SHIFT) static inline bool context_enabled(struct intel_context *ce) { return (atomic_read(&ce->guc_sched_state_no_lock) &
@@ -116,6 +121,30 @@ static inline void clr_context_pending_enable(struct intel_context *ce) &ce->guc_sched_state_no_lock); }
+static inline u32 context_blocked(struct intel_context *ce) +{
- return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_BLOCKED_MASK) >>
SCHED_STATE_NO_LOCK_BLOCKED_SHIFT;
+}
+static inline void incr_context_blocked(struct intel_context *ce) +{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
- atomic_add(SCHED_STATE_NO_LOCK_BLOCKED,
&ce->guc_sched_state_no_lock);
- GEM_BUG_ON(!context_blocked(ce)); /* Overflow check */
+}
+static inline void decr_context_blocked(struct intel_context *ce) +{
- lockdep_assert_held(&ce->engine->sched_engine->lock);
- GEM_BUG_ON(!context_blocked(ce));
- atomic_sub(SCHED_STATE_NO_LOCK_BLOCKED,
&ce->guc_sched_state_no_lock);
+}
- /*
- Below is a set of functions which control the GuC scheduling state which
- require a lock, aside from the special case where the functions are called
@@ -404,6 +433,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(err)) goto out; }
/*
* The request / context will be run on the hardware when scheduling
* gets enabled in the unblock.
*/
if (unlikely(context_blocked(ce)))
goto out;
enabled = context_enabled(ce);
if (!enabled) {
@@ -532,6 +569,7 @@ static void __guc_context_destroy(struct intel_context *ce); static void release_guc_id(struct intel_guc *guc, struct intel_context *ce); static void guc_signal_context_fence(struct intel_context *ce); static void guc_cancel_context_requests(struct intel_context *ce); +static void guc_blocked_fence_complete(struct intel_context *ce);
static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) { @@ -579,6 +617,10 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) } intel_context_sched_disable_unpin(ce); atomic_dec(&guc->outstanding_submission_g2h);
spin_lock_irqsave(&ce->guc_state.lock, flags);
guc_blocked_fence_complete(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
} }intel_context_put(ce);
@@ -1353,6 +1395,21 @@ static void guc_context_post_unpin(struct intel_context *ce) lrc_post_unpin(ce); }
+static void __guc_context_sched_enable(struct intel_guc *guc,
struct intel_context *ce)
+{
- u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
ce->guc_id,
GUC_CONTEXT_ENABLE
- };
- trace_intel_context_sched_enable(ce);
- guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
+}
- static void __guc_context_sched_disable(struct intel_guc *guc, struct intel_context *ce, u16 guc_id)
@@ -1371,17 +1428,130 @@ static void __guc_context_sched_disable(struct intel_guc *guc, G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true); }
+static void guc_blocked_fence_complete(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- if (!i915_sw_fence_done(&ce->guc_blocked))
i915_sw_fence_complete(&ce->guc_blocked);
+}
+static void guc_blocked_fence_reinit(struct intel_context *ce) +{
- lockdep_assert_held(&ce->guc_state.lock);
- GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked));
- /*
* This fence is always complete unless a pending schedule disable is
* outstanding. We arm the fence here and complete it when we receive
* the pending schedule disable complete message.
*/
- i915_sw_fence_fini(&ce->guc_blocked);
- i915_sw_fence_reinit(&ce->guc_blocked);
- i915_sw_fence_await(&ce->guc_blocked);
- i915_sw_fence_commit(&ce->guc_blocked);
+}
static u16 prep_context_pending_disable(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock);
set_context_pending_disable(ce); clr_context_enabled(ce);
guc_blocked_fence_reinit(ce); intel_context_get(ce);
return ce->guc_id; }
+static struct i915_sw_fence *guc_context_block(struct intel_context *ce) +{
- struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
- unsigned long flags;
- struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
- intel_wakeref_t wakeref;
- u16 guc_id;
- bool enabled;
- /* Sync with submission path and unblock */
- spin_lock_irqsave(&sched_engine->lock, flags);
- incr_context_blocked(ce);
- spin_unlock_irqrestore(&sched_engine->lock, flags);
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- enabled = context_enabled(ce);
- if (unlikely(!enabled || submission_disabled(guc))) {
if (enabled)
clr_context_enabled(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return &ce->guc_blocked;
- }
- /*
* We add +2 here as the schedule disable complete CTB handler calls
* intel_context_sched_disable_unpin (-2 to pin_count).
*/
- atomic_add(2, &ce->pin_count);
- guc_id = prep_context_pending_disable(ce);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_disable(guc, ce, guc_id);
- return &ce->guc_blocked;
+}
+static void guc_context_unblock(struct intel_context *ce) +{
- struct intel_guc *guc = ce_to_guc(ce);
- struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
- unsigned long flags;
- struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
- intel_wakeref_t wakeref;
- GEM_BUG_ON(context_enabled(ce));
- spin_lock_irqsave(&ce->guc_state.lock, flags);
- if (unlikely(submission_disabled(guc) ||
!intel_context_is_pinned(ce) ||
context_pending_disable(ce) ||
context_blocked(ce) > 1)) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
goto out;
- }
- set_context_pending_enable(ce);
- set_context_enabled(ce);
- intel_context_get(ce);
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_enable(guc, ce);
+out:
- spin_lock_irqsave(&sched_engine->lock, flags);
- decr_context_blocked(ce);
- spin_unlock_irqrestore(&sched_engine->lock, flags);.
As we discussed offline, it might be better to cover the whole block/unblock logic with ce->guc_state.lock. That way we can make sure we're not racing the incr/decr calls with the swapping locks. Apart from this, the patch LGTM.
Daniele
+}
+static void guc_context_cancel_request(struct intel_context *ce,
struct i915_request *rq)
+{
- if (i915_sw_fence_signaled(&rq->submit)) {
struct i915_sw_fence *fence = guc_context_block(ce);
i915_sw_fence_wait(fence);
if (!i915_request_completed(rq)) {
__i915_request_skip(rq);
guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
true);
}
guc_context_unblock(ce);
- }
+}
- static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout)
@@ -1641,6 +1811,8 @@ static const struct intel_context_ops guc_context_ops = {
.ban = guc_context_ban,
- .cancel_request = guc_context_cancel_request,
- .enter = intel_context_enter_engine, .exit = intel_context_exit_engine,
@@ -1836,6 +2008,8 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.ban = guc_context_ban,
- .cancel_request = guc_context_cancel_request,
- .enter = guc_virtual_context_enter, .exit = guc_virtual_context_exit,
@@ -2294,6 +2468,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, clr_context_banned(ce); clr_context_pending_disable(ce); __guc_signal_context_fence(ce);
guc_blocked_fence_complete(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (banned) {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 28f38b02a5d2..541a20371502 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -710,18 +710,6 @@ void i915_request_unsubmit(struct i915_request *request) spin_unlock_irqrestore(&engine->sched_engine->lock, flags); }
-static void __cancel_request(struct i915_request *rq) -{
- struct intel_engine_cs *engine = NULL;
- i915_request_active_engine(rq, &engine);
- if (engine && intel_engine_pulse(engine))
intel_gt_handle_error(engine->gt, engine->mask, 0,
"request cancellation by %s",
current->comm);
-}
- void i915_request_cancel(struct i915_request *rq, int error) { if (!i915_request_set_error_once(rq, error))
@@ -729,7 +717,7 @@ void i915_request_cancel(struct i915_request *rq, int error)
set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
- __cancel_request(rq);
intel_context_cancel_request(rq->context, rq); }
static int __i915_sw_fence_call