Re: [PATCH] drm/i915: Flip guc_id allocation partition

2 Feb 2022

On 13.01.2022 17:27, Matthew Brost wrote:
...
Move the multi-lrc guc_id from the lower allocation partition (0 to
number of multi-lrc guc_ids) to upper allocation partition (number of
single-lrc to max guc_ids).
This will help when a native driver transitions to a PF after driver
load time. If the perma-pin guc_ids (kernel contexts) are in a low range
it is easy reduce total number of guc_ids as the allocated slrc are in a
valid range the mlrc range moves to an unused range. Assuming no mlrc
are allocated and few slrc are used the native to PF transition is
seamless for the guc_id resource.
v2:
 (Michal / Tvrtko)

Add an explaination to commit message of why this patch is needed

(Michal / Piotr)

Replace marcos with functions

(Michal)

Rework logic flow in new_mlrc_guc_id
Unconditionally call bitmap_free

v3:
 (Michal)

Move allocation of mlrc bitmap back submission init

(CI)

Resend for CI

Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++-----
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a40f10d376d..fce58365b3ff8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
#define GUC_REQUEST_SIZE 64 /* bytes */
-/*


We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous



per the GuC submission interface. A different allocation algorithm is used



(bitmap vs. ida) between multi-lrc and single-lrc hence the reason to



partition the guc_id space. We believe the number of multi-lrc contexts in



use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for



multi-lrc.


*/

-#define NUMBER_MULTI_LRC_GUC_ID(guc)	\

((guc)->submission_state.num_guc_ids / 16)


/*

Below is a set of functions which control the GuC scheduling state which
require a lock.

@@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
 }
static void destroyed_worker_func(struct work_struct *w);
+static int number_mlrc_guc_id(struct intel_guc *guc);
/*

Set up the memory resources to be shared with the GuC (via the GGTT)

@@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
   	  destroyed_worker_func);
guc->submission_state.guc_ids_bitmap =

bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);




bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);



to fully benefit from the id partition flip we likely will have to
allocate bitmap 'just-in-time' when first mlrc id is needed
so something like you had in early rev but abandon to avoid alloc inside
spinlock - but I'm wondering why we can't alloc bitmap for mlrc case,
while we allow allocation for slrc (as ida_simple_get may alloc, no?
...
if (!guc->submission_state.guc_ids_bitmap)
   	return -ENOMEM;
@@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq)
   spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
+/*


We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous



per the GuC submission interface. A different allocation algorithm is used



(bitmap vs. ida) between multi-lrc and single-lrc hence the reason to



partition the guc_id space. We believe the number of multi-lrc contexts in



use should be low and 1/16 should be sufficient.



do we have any other numbers as guideline ?
while it is easy assumption that 1/16 from 64K contexts may be
sufficient, what about 1/16 of 1K contexts ? will that work too ?
also, do we have to make hard split ? what if there will be no users for
mlrc but more slrc contexts would be beneficial ? or the opposite ?
...

*/

+#define MLRC_GUC_ID_RATIO	16



+static int number_mlrc_guc_id(struct intel_guc *guc)
+{

return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;

+}



+static int number_slrc_guc_id(struct intel_guc *guc)
+{

return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);

+}



+static int mlrc_guc_id_base(struct intel_guc *guc)
+{

return number_slrc_guc_id(guc);

+}



+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{

int ret;

GEM_BUG_ON(!intel_context_is_parent(ce));
GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);

ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
		       number_mlrc_guc_id(guc),


		       order_base_2(ce->parallel.number_children


				    + 1));


if (unlikely(ret < 0))
return ret;



return ret + mlrc_guc_id_base(guc);

+}



+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{

GEM_BUG_ON(intel_context_is_parent(ce));

return ida_simple_get(&guc->submission_state.guc_ids,
	      0, number_slrc_guc_id(guc),


	      GFP_KERNEL | __GFP_RETRY_MAYFAIL |


	      __GFP_NOWARN);



are you sure that these gfp flags are safe for use under spinlock ?
-Michal
...
+}



static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
   int ret;
@@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
   GEM_BUG_ON(intel_context_is_child(ce));
if (intel_context_is_parent(ce))

ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,


			      NUMBER_MULTI_LRC_GUC_ID(guc),


			      order_base_2(ce->parallel.number_children


					   + 1));




ret = new_mlrc_guc_id(guc, ce);

else


ret = ida_simple_get(&guc->submission_state.guc_ids,


		     NUMBER_MULTI_LRC_GUC_ID(guc),


		     guc->submission_state.num_guc_ids,


		     GFP_KERNEL | __GFP_RETRY_MAYFAIL |


		     __GFP_NOWARN);




ret = new_slrc_guc_id(guc, ce);


if (unlikely(ret < 0))
  return ret;


    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH] drm/i915: Flip guc_id allocation partition

Signed-off-by: Matthew Brost matthew.brost@intel.com