On 26/09/2019 10:54, Steven Price wrote:
drm_sched_cleanup_jobs() attempts to free finished jobs, however because it is called as the condition of wait_event_interruptible() it must not sleep. Unfortuantly some free callbacks (notibly for Panfrost) do sleep.
Instead let's rename drm_sched_cleanup_jobs() to drm_sched_get_cleanup_job() and simply return a job for processing if there is one. The caller can then call the free_job() callback outside the wait_event_interruptible() where sleeping is possible before re-checking and returning to sleep if necessary.
Signed-off-by: Steven Price steven.price@arm.com
Changes from v1:
- Move list_first_entry_or_null() within the lock
Or rather that was my intention - but apparently I fat fingered and didn't actually include this change. Sorry - v3 on it's way!
Steve
drivers/gpu/drm/scheduler/sched_main.c | 44 ++++++++++++++------------ 1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9a0ee74d82dc..0ed4aaa4e6d1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -622,43 +622,41 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) }
/**
- drm_sched_cleanup_jobs - destroy finished jobs
- drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
- @sched: scheduler instance
- Remove all finished jobs from the mirror list and destroy them.
- Returns the next finished job from the mirror list (if there is one)
*/
- ready for it to be destroyed.
-static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched) +static struct drm_sched_job * +drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) {
struct drm_sched_job *job = NULL; unsigned long flags;
/* Don't destroy jobs while the timeout worker is running */ if (sched->timeout != MAX_SCHEDULE_TIMEOUT && !cancel_delayed_work(&sched->work_tdr))
return;
- while (!list_empty(&sched->ring_mirror_list)) {
struct drm_sched_job *job;
return NULL;
job = list_first_entry(&sched->ring_mirror_list,
- job = list_first_entry_or_null(&sched->ring_mirror_list, struct drm_sched_job, node);
if (!dma_fence_is_signaled(&job->s_fence->finished))
break;
spin_lock_irqsave(&sched->job_list_lock, flags);
- spin_lock_irqsave(&sched->job_list_lock, flags);
- if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from ring_mirror_list */ list_del_init(&job->node);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
sched->ops->free_job(job);
- } else {
job = NULL;
/* queue timeout for next job */
}drm_sched_start_timeout(sched);
- /* queue timeout for next job */
- spin_lock_irqsave(&sched->job_list_lock, flags);
- drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags);
- return job;
}
/** @@ -698,12 +696,18 @@ static int drm_sched_main(void *param) struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; struct dma_fence *fence;
struct drm_sched_job *cleanup_job = NULL;
wait_event_interruptible(sched->wake_up_worker,
(drm_sched_cleanup_jobs(sched),
(cleanup_job = drm_sched_get_cleanup_job(sched)) || (!drm_sched_blocked(sched) && (entity = drm_sched_select_entity(sched))) ||
kthread_should_stop()));
kthread_should_stop());
while (cleanup_job) {
sched->ops->free_job(cleanup_job);
cleanup_job = drm_sched_get_cleanup_job(sched);
}
if (!entity) continue;