Re: [PATCH 43/51] drm/i915/guc: Support request cancellation

22 Jul 2021

On 7/16/2021 1:17 PM, Matthew Brost wrote:
...
This adds GuC backend support for i915_request_cancel(), which in turn
makes CONFIG_DRM_I915_REQUEST_TIMEOUT work.
This needs a bit of explanation on why we're using fences for this 
instead of other simpler options.
...
Signed-off-by: Matthew Brost matthew.brost@intel.com
Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com

drivers/gpu/drm/i915/gt/intel_context.c       |   9 +
  drivers/gpu/drm/i915/gt/intel_context.h       |   7 +
  drivers/gpu/drm/i915/gt/intel_context_types.h |   7 +
  .../drm/i915/gt/intel_execlists_submission.c  |  18 ++
  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 169 ++++++++++++++++++
  drivers/gpu/drm/i915/i915_request.c           |  14 +-
  6 files changed, 211 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index dd078a80c3a3..b1e3d00fb1f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -366,6 +366,12 @@ static int __intel_context_active(struct i915_active *active)
   return 0;
  }
+static int sw_fence_dummy_notify(struct i915_sw_fence *sf,

		 enum i915_sw_fence_notify state)



+{

return NOTIFY_DONE;

+}

void
intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
{

@@ -399,6 +405,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
   ce->guc_id = GUC_INVALID_LRC_ID;
   INIT_LIST_HEAD(&ce->guc_id_link);

i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify);
i915_sw_fence_commit(&ce->guc_blocked);

We need a comment somewhere to explain how we use this blocked fence, 
I.e. that fence starts signaled to indicate unblocked and we re-init it 
to unsignaled status when we need to mark something as blocked.
...

i915_active_init(&ce->active,
  	 __intel_context_active, __intel_context_retire, 0);
}

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 814d9277096a..876bdb08303c 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -70,6 +70,13 @@ intel_context_is_pinned(struct intel_context *ce)
   return atomic_read(&ce->pin_count);
  }
+static inline void intel_context_cancel_request(struct intel_context *ce,

				struct i915_request *rq)



+{

GEM_BUG_ON(!ce->ops->cancel_request);
return ce->ops->cancel_request(ce, rq);

+}

/**
intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
@ce - the context



diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 57c19ee3e313..005a64f2afa7 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,6 +13,7 @@
  #include <linux/types.h>
#include "i915_active_types.h"
+#include "i915_sw_fence.h"
  #include "i915_utils.h"
  #include "intel_engine_types.h"
  #include "intel_sseu.h"
@@ -42,6 +43,9 @@ struct intel_context_ops {
   void (*unpin)(struct intel_context *ce);
   void (*post_unpin)(struct intel_context *ce);

void (*cancel_request)(struct intel_context *ce,
	       struct i915_request *rq);



I don't see an implementation for this for the ringbuffer backend.
...

void (*enter)(struct intel_context *ce);
 void (*exit)(struct intel_context *ce);

@@ -184,6 +188,9 @@ struct intel_context {
    * GuC ID link - in list when unpinned but guc_id still valid in GuC
    */
   struct list_head guc_id_link;


/* GuC context blocked fence */

struct i915_sw_fence guc_blocked;
};
#endif /* __INTEL_CONTEXT_TYPES__ */


diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index f9b5f54a5abe..8f6dc0fb49a6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -114,6 +114,7 @@
  #include "gen8_engine_cs.h"
  #include "intel_breadcrumbs.h"
  #include "intel_context.h"
+#include "intel_engine_heartbeat.h"
  #include "intel_engine_pm.h"
  #include "intel_engine_stats.h"
  #include "intel_execlists_submission.h"
@@ -2536,11 +2537,26 @@ static int execlists_context_alloc(struct intel_context *ce)
   return lrc_alloc(ce, ce->engine);
  }
+static void execlists_context_cancel_request(struct intel_context *ce,

			     struct i915_request *rq)



+{

struct intel_engine_cs *engine = NULL;

i915_request_active_engine(rq, &engine);

if (engine && intel_engine_pulse(engine))
intel_gt_handle_error(engine->gt, engine->mask, 0,


		      "request cancellation by %s",


		      current->comm);



+}

static const struct intel_context_ops execlists_context_ops = {
 .flags = COPS_HAS_INFLIGHT,
.alloc = execlists_context_alloc,

.cancel_request = execlists_context_cancel_request,

.pre_pin = execlists_context_pre_pin,
 .pin = execlists_context_pin,
 .unpin = lrc_unpin,


@@ -3558,6 +3574,8 @@ static const struct intel_context_ops virtual_context_ops = {
.alloc = virtual_context_alloc,

.cancel_request = execlists_context_cancel_request,
.pre_pin = virtual_context_pre_pin,
 .pin = virtual_context_pin,
 .unpin = lrc_unpin,

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 149990196e3a..1c30d04733ff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -81,6 +81,11 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
   */
  #define SCHED_STATE_NO_LOCK_ENABLED			BIT(0)
  #define SCHED_STATE_NO_LOCK_PENDING_ENABLE		BIT(1)
+#define SCHED_STATE_NO_LOCK_BLOCKED_SHIFT		2
+#define SCHED_STATE_NO_LOCK_BLOCKED \

BIT(SCHED_STATE_NO_LOCK_BLOCKED_SHIFT)

+#define SCHED_STATE_NO_LOCK_BLOCKED_MASK \

(0xffff << SCHED_STATE_NO_LOCK_BLOCKED_SHIFT)
static inline bool context_enabled(struct intel_context *ce)
{
return (atomic_read(&ce->guc_sched_state_no_lock) &

@@ -116,6 +121,27 @@ static inline void clr_context_pending_enable(struct intel_context *ce)
   	   &ce->guc_sched_state_no_lock);
  }
+static inline u32 context_blocked(struct intel_context *ce)
+{

return (atomic_read(&ce->guc_sched_state_no_lock) &
SCHED_STATE_NO_LOCK_BLOCKED_MASK) >>


SCHED_STATE_NO_LOCK_BLOCKED_SHIFT;



+}



+static inline void incr_context_blocked(struct intel_context *ce)
+{

lockdep_assert_held(&ce->engine->sched_engine->lock);

It's a bit weird requiring a lock for a variable that is purposely 
called no_lock, but I do get it is not the GuC lock. Can you explain 
which race you're trying to guard against?
...

atomic_add(SCHED_STATE_NO_LOCK_BLOCKED,
   &ce->guc_sched_state_no_lock);



Do we need an overflow check, or are we guaranteed that the count will 
stay within a certain range?
...
+}



+static inline void decr_context_blocked(struct intel_context *ce)
+{

lockdep_assert_held(&ce->engine->sched_engine->lock);

GEM_BUG_ON(!context_blocked(ce)) ?
...

atomic_sub(SCHED_STATE_NO_LOCK_BLOCKED,
   &ce->guc_sched_state_no_lock);



+}

/*
Below is a set of functions which control the GuC scheduling state which
require a lock, aside from the special case where the functions are called



@@ -403,6 +429,10 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
   	if (unlikely(err))
   		goto out;
   }


if (unlikely(context_blocked(ce)))
goto out;



You're not setting any error state here for this aborted request. Will 
the request be automatically re-submitted on unblock? could use a 
comment if that's the case.
...

enabled = context_enabled(ce);
if (!enabled) {


@@ -531,6 +561,7 @@ static void __guc_context_destroy(struct intel_context *ce);
  static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
  static void guc_signal_context_fence(struct intel_context *ce);
  static void guc_cancel_context_requests(struct intel_context *ce);
+static void guc_blocked_fence_complete(struct intel_context *ce);
static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
  {
@@ -578,6 +609,10 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
   		}
   		intel_context_sched_disable_unpin(ce);
   		atomic_dec(&guc->outstanding_submission_g2h);

	spin_lock_irqsave(&ce->guc_state.lock, flags);


	guc_blocked_fence_complete(ce);


	spin_unlock_irqrestore(&ce->guc_state.lock, flags);


intel_context_put(ce);

}
 }

@@ -1339,6 +1374,21 @@ static void guc_context_post_unpin(struct intel_context *ce)
   lrc_post_unpin(ce);
  }
+static void __guc_context_sched_enable(struct intel_guc *guc,
Why void? this can fail
...

		       struct intel_context *ce)



+{

u32 action[] = {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,


ce->guc_id,


GUC_CONTEXT_ENABLE


};

trace_intel_context_sched_enable(ce);

guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
		      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);



+}

static void __guc_context_sched_disable(struct intel_guc *guc,
  			struct intel_context *ce,
  			u16 guc_id)

@@ -1357,17 +1407,131 @@ static void __guc_context_sched_disable(struct intel_guc *guc,
   			      G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, true);
  }
+static void guc_blocked_fence_complete(struct intel_context *ce)
+{

lockdep_assert_held(&ce->guc_state.lock);

if (!i915_sw_fence_done(&ce->guc_blocked))
i915_sw_fence_complete(&ce->guc_blocked);



+}



+static void guc_blocked_fence_reinit(struct intel_context *ce)
+{

lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked));
i915_sw_fence_fini(&ce->guc_blocked);
i915_sw_fence_reinit(&ce->guc_blocked);
i915_sw_fence_await(&ce->guc_blocked);
i915_sw_fence_commit(&ce->guc_blocked);

+}

static u16 prep_context_pending_disable(struct intel_context *ce)
{
 lockdep_assert_held(&ce->guc_state.lock);
set_context_pending_disable(ce);
 clr_context_enabled(ce);

guc_blocked_fence_reinit(ce);
intel_context_get(ce);
return ce->guc_id;
}


+static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
+{

struct intel_guc *guc = ce_to_guc(ce);
struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;

engine->uncore->rpm
...

intel_wakeref_t wakeref;
u16 guc_id;
bool enabled;

spin_lock_irqsave(&sched_engine->lock, flags);
incr_context_blocked(ce);
spin_unlock_irqrestore(&sched_engine->lock, flags);

spin_lock_irqsave(&ce->guc_state.lock, flags);
enabled = context_enabled(ce);
if (unlikely(!enabled || submission_disabled(guc))) {
if (enabled)


	clr_context_enabled(ce);


spin_unlock_irqrestore(&ce->guc_state.lock, flags);


return &ce->guc_blocked;


}

/*
* We add +2 here as the schedule disable complete CTB handler calls


* intel_context_sched_disable_unpin (-2 to pin_count).


*/


atomic_add(2, &ce->pin_count);

guc_id = prep_context_pending_disable(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);

with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_disable(guc, ce, guc_id);



return &ce->guc_blocked;

+}



+static void guc_context_unblock(struct intel_context *ce)
+{

struct intel_guc *guc = ce_to_guc(ce);
struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
unsigned long flags;
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;

engine->uncore->rpm
...

intel_wakeref_t wakeref;

GEM_BUG_ON(context_enabled(ce));

if (unlikely(context_blocked(ce) > 1)) {
spin_lock_irqsave(&sched_engine->lock, flags);


if (likely(context_blocked(ce) > 1))


	goto decrement;


spin_unlock_irqrestore(&sched_engine->lock, flags);


}

spin_lock_irqsave(&ce->guc_state.lock, flags);
if (unlikely(submission_disabled(guc) ||
     !intel_context_is_pinned(ce) ||


     context_pending_disable(ce) ||


     context_blocked(ce) > 1)) {



you've already checked context_blocked > 1 twice above. If you can't 
trust the value to remain stable, maybe keep the spinlock locked for 
more of the flow?
...

spin_unlock_irqrestore(&ce->guc_state.lock, flags);


goto out;


}

set_context_pending_enable(ce);
set_context_enabled(ce);

Shouldn't we set this to enabled only after the H2G has succeeded?
Daniele
...

intel_context_get(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);

with_intel_runtime_pm(runtime_pm, wakeref)
__guc_context_sched_enable(guc, ce);




+out:

spin_lock_irqsave(&sched_engine->lock, flags);

+decrement:

decr_context_blocked(ce);
spin_unlock_irqrestore(&sched_engine->lock, flags);

+}



+static void guc_context_cancel_request(struct intel_context *ce,

		       struct i915_request *rq)



+{

if (i915_sw_fence_signaled(&rq->submit)) {
struct i915_sw_fence *fence = guc_context_block(ce);



i915_sw_fence_wait(fence);


if (!i915_request_completed(rq)) {


	__i915_request_skip(rq);


	guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),


			true);


}


guc_context_unblock(ce);


}

+}

static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
  				 u16 guc_id,
  				 u32 preemption_timeout)

@@ -1626,6 +1790,8 @@ static const struct intel_context_ops guc_context_ops = {
.ban = guc_context_ban,

.cancel_request = guc_context_cancel_request,
.enter = intel_context_enter_engine,
 .exit = intel_context_exit_engine,

@@ -1821,6 +1987,8 @@ static const struct intel_context_ops virtual_guc_context_ops = {
.ban = guc_context_ban,

.cancel_request = guc_context_cancel_request,
.enter = guc_virtual_context_enter,
 .exit = guc_virtual_context_exit,

@@ -2290,6 +2458,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
   	clr_context_banned(ce);
   	clr_context_pending_disable(ce);
   	__guc_signal_context_fence(ce);

guc_blocked_fence_complete(ce);

spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (banned) {


diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index eb109f93ebcb..f3552642b8a1 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -708,18 +708,6 @@ void i915_request_unsubmit(struct i915_request *request)
   spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
  }
-static void __cancel_request(struct i915_request *rq)
-{

struct intel_engine_cs *engine = NULL;

i915_request_active_engine(rq, &engine);

if (engine && intel_engine_pulse(engine))
intel_gt_handle_error(engine->gt, engine->mask, 0,


		      "request cancellation by %s",


		      current->comm);



-}

void i915_request_cancel(struct i915_request *rq, int error)
{
 if (!i915_request_set_error_once(rq, error))

@@ -727,7 +715,7 @@ void i915_request_cancel(struct i915_request *rq, int error)
set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);

__cancel_request(rq);


intel_context_cancel_request(rq->context, rq);
}
static int __i915_sw_fence_call



    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH 43/51] drm/i915/guc: Support request cancellation