SLPC min/max frequency updates require H2G calls. We are seeing timeouts when GuC channel is backed up and it is unable to respond in a timely fashion causing warnings and affecting CI.
This is seen when waitboosting happens during a stress test. this patch updates the waitboost path to use a non-blocking H2G call instead, which returns as soon as the message is successfully transmitted.
v2: Use drm_notice to report any errors that might occur while sending the waitboost H2G request (Tvrtko) v3: Add drm_notice inside force_min_freq (Ashutosh)
Cc: Ashutosh Dixit ashutosh.dixit@intel.com Signed-off-by: Vinay Belgaumkar vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 42 +++++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2df31af70d63..ec9c4ca0f615 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -98,6 +98,30 @@ static u32 slpc_get_state(struct intel_guc_slpc *slpc) return data->header.global_state; }
+static int guc_action_slpc_set_param_nb(struct intel_guc *guc, u8 id, u32 value) +{ + u32 request[] = { + GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, + SLPC_EVENT(SLPC_EVENT_PARAMETER_SET, 2), + id, + value, + }; + int ret; + + ret = intel_guc_send_nb(guc, request, ARRAY_SIZE(request), 0); + + return ret > 0 ? -EPROTO : ret; +} + +static int slpc_set_param_nb(struct intel_guc_slpc *slpc, u8 id, u32 value) +{ + struct intel_guc *guc = slpc_to_guc(slpc); + + GEM_BUG_ON(id >= SLPC_MAX_PARAM); + + return guc_action_slpc_set_param_nb(guc, id, value); +} + static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) { u32 request[] = { @@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */
with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, - SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, - freq); + /* Non-blocking request will avoid stalls */ + ret = slpc_set_param_nb(slpc, + SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ, + freq); if (ret) - i915_probe_error(i915, "Unable to force min freq to %u: %d", - freq, ret); + drm_notice(&i915->drm, + "Failed to send set_param for min freq(%d): (%d)\n", + freq, ret); }
return ret; @@ -222,6 +248,7 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) static void slpc_boost_work(struct work_struct *work) { struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work); + int err;
/* * Raise min freq to boost. It's possible that @@ -231,8 +258,9 @@ static void slpc_boost_work(struct work_struct *work) */ mutex_lock(&slpc->lock); if (atomic_read(&slpc->num_waiters)) { - slpc_force_min_freq(slpc, slpc->boost_freq); - slpc->num_boosts++; + err = slpc_force_min_freq(slpc, slpc->boost_freq); + if (!err) + slpc->num_boosts++; } mutex_unlock(&slpc->lock); }
On Wed, 22 Jun 2022 17:32:25 -0700, Vinay Belgaumkar wrote:
@@ -208,12 +232,14 @@ static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) */
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
ret = slpc_set_param(slpc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
freq);
/* Non-blocking request will avoid stalls */
ret = slpc_set_param_nb(slpc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
if (ret)freq);
i915_probe_error(i915, "Unable to force min freq to %u: %d",
freq, ret);
drm_notice(&i915->drm,
"Failed to send set_param for min freq(%d): (%d)\n",
freq, ret);
I am still thinking if we should replace drm_notice() by i915_probe_error() since drm_notice() will basically hide any issues of boost/de-boost's getting dropped.
Another idea here might be to maintain a counter, say "slpc->failed_boosts" which we increment each time slpc_set_param_nb() fails and dump that counter via intel_guc_slpc_print_info().
Anyway for now this is:
Reviewed-by: Ashutosh Dixit ashutosh.dixit@intel.com
dri-devel@lists.freedesktop.org