Update job earlier with the scheduler it is supposed to be scheduled on.
Otherwise we could incorrectly optimize dependencies when moving an entity from one scheduler to another.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 4 ++-- drivers/gpu/drm/scheduler/sched_fence.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 65078dd3c82c..029863726c99 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -533,8 +533,6 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
- sched_job->sched = sched; - sched_job->s_fence->sched = sched; while ((entity->dependency = sched->ops->dependency(sched_job, entity))) if (drm_sched_entity_add_dependency_cb(entity)) return NULL; @@ -581,6 +579,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, spin_unlock(&entity->rq_lock); }
+ sched_job->sched = entity->rq->sched; + sched_job->s_fence->sched = entity->rq->sched; trace_drm_sched_job(sched_job, entity); atomic_inc(&entity->rq->sched->num_jobs); WRITE_ONCE(entity->last_user, current->group_leader); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 4029312fdd81..6dab18d288d7 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -172,7 +172,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, return NULL;
fence->owner = owner; - fence->sched = entity->rq->sched; + fence->sched = NULL; spin_lock_init(&fence->lock);
seq = atomic_inc_return(&entity->fence_seq);
This fixes accessing the last_scheduled fence from multiple threads as well as makes it easier to move entities between schedulers.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 029863726c99..e4b71a543481 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -523,6 +523,29 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) return false; }
+static bool drm_sched_entity_last_scheduled_dep(struct drm_sched_entity *entity) +{ + struct drm_sched_fence *s_fence; + + if (!entity->last_scheduled) + return false; + + /* + * Check if the last submission was handled by a different scheduler + */ + s_fence = to_drm_sched_fence(entity->last_scheduled); + if (s_fence && s_fence->sched != entity->rq->sched) { + entity->dependency = dma_fence_get(entity->last_scheduled); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, + drm_sched_entity_wakeup)) + return true; + + dma_fence_put(entity->dependency); + entity->dependency = NULL; + } + return false; +} + static struct drm_sched_job * drm_sched_entity_pop_job(struct drm_sched_entity *entity) { @@ -537,6 +560,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (drm_sched_entity_add_dependency_cb(entity)) return NULL;
+ if (drm_sched_entity_last_scheduled_dep(entity)) + return NULL; + /* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); @@ -564,14 +590,10 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity) { struct drm_sched_rq *rq = entity->rq; - bool first, reschedule, idle; + bool first;
- idle = entity->last_scheduled == NULL || - dma_fence_is_signaled(entity->last_scheduled); first = spsc_queue_count(&entity->job_queue) == 0; - reschedule = idle && first && (entity->num_rq_list > 1); - - if (reschedule) { + if (first && (entity->num_rq_list > 1)) { rq = drm_sched_entity_get_free_sched(entity); spin_lock(&entity->rq_lock); drm_sched_rq_remove_entity(entity->rq, entity);
Hi Christian,
Good patch. But it might lead to bad performance when we reschedule when the hardware queue of the engine on which the last job was pushed is not full.
On Mon, Aug 6, 2018 at 5:49 PM Christian König < ckoenig.leichtzumerken@gmail.com> wrote:
This fixes accessing the last_scheduled fence from multiple threads as well as makes it easier to move entities between schedulers.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/scheduler/gpu_scheduler.c | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 029863726c99..e4b71a543481 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -523,6 +523,29 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) return false; }
+static bool drm_sched_entity_last_scheduled_dep(struct drm_sched_entity *entity) +{
struct drm_sched_fence *s_fence;
if (!entity->last_scheduled)
return false;
/*
* Check if the last submission was handled by a different
scheduler
*/
s_fence = to_drm_sched_fence(entity->last_scheduled);
if (s_fence && s_fence->sched != entity->rq->sched) {
entity->dependency = dma_fence_get(entity->last_scheduled);
if (!dma_fence_add_callback(entity->dependency,
&entity->cb,
drm_sched_entity_wakeup))
return true;
dma_fence_put(entity->dependency);
entity->dependency = NULL;
}
return false;
+}
static struct drm_sched_job * drm_sched_entity_pop_job(struct drm_sched_entity *entity) { @@ -537,6 +560,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (drm_sched_entity_add_dependency_cb(entity)) return NULL;
if (drm_sched_entity_last_scheduled_dep(entity))
return NULL;
/* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished,
-ECANCELED); @@ -564,14 +590,10 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity) { struct drm_sched_rq *rq = entity->rq;
bool first, reschedule, idle;
bool first;
idle = entity->last_scheduled == NULL ||
dma_fence_is_signaled(entity->last_scheduled); first = spsc_queue_count(&entity->job_queue) == 0;
reschedule = idle && first && (entity->num_rq_list > 1);
if (reschedule) {
if (first && (entity->num_rq_list > 1)) {
Something like this might be better:
if (first && (entity->num_rq_list > 1) && (hw_rq_count == hw_submission_limit))
rq = drm_sched_entity_get_free_sched(entity); spin_lock(&entity->rq_lock); drm_sched_rq_remove_entity(entity->rq, entity);
-- 2.14.1
Regards,
Nayan
Am 06.08.2018 um 15:23 schrieb Nayan Deshmukh:
Hi Christian,
Good patch. But it might lead to bad performance when we reschedule when the hardware queue of the engine on which the last job was pushed is not full.
On Mon, Aug 6, 2018 at 5:49 PM Christian König <ckoenig.leichtzumerken@gmail.com mailto:ckoenig.leichtzumerken@gmail.com> wrote:
This fixes accessing the last_scheduled fence from multiple threads as well as makes it easier to move entities between schedulers. Signed-off-by: Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 34 +++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 029863726c99..e4b71a543481 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -523,6 +523,29 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) return false; } +static bool drm_sched_entity_last_scheduled_dep(struct drm_sched_entity *entity) +{ + struct drm_sched_fence *s_fence; + + if (!entity->last_scheduled) + return false; + + /* + * Check if the last submission was handled by a different scheduler + */ + s_fence = to_drm_sched_fence(entity->last_scheduled); + if (s_fence && s_fence->sched != entity->rq->sched) { + entity->dependency = dma_fence_get(entity->last_scheduled); + if (!dma_fence_add_callback(entity->dependency, &entity->cb, + drm_sched_entity_wakeup)) + return true; + + dma_fence_put(entity->dependency); + entity->dependency = NULL; + } + return false; +} + static struct drm_sched_job * drm_sched_entity_pop_job(struct drm_sched_entity *entity) { @@ -537,6 +560,9 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (drm_sched_entity_add_dependency_cb(entity)) return NULL; + if (drm_sched_entity_last_scheduled_dep(entity)) + return NULL; + /* skip jobs from entity that marked guilty */ if (entity->guilty && atomic_read(entity->guilty)) dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED); @@ -564,14 +590,10 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity) { struct drm_sched_rq *rq = entity->rq; - bool first, reschedule, idle; + bool first; - idle = entity->last_scheduled == NULL || - dma_fence_is_signaled(entity->last_scheduled); first = spsc_queue_count(&entity->job_queue) == 0; - reschedule = idle && first && (entity->num_rq_list > 1); - - if (reschedule) { + if (first && (entity->num_rq_list > 1)) {
Something like this might be better:
if (first && (entity->num_rq_list > 1) && (hw_rq_count == hw_submission_limit))
rq = drm_sched_entity_get_free_sched(entity); spin_lock(&entity->rq_lock); drm_sched_rq_remove_entity(entity->rq, entity); -- 2.14.1
Hey, good idea. Going to take that into account.
Christian.
Regards, Nayan
Hi Christian,
On Mon, Aug 6, 2018 at 5:49 PM Christian König < ckoenig.leichtzumerken@gmail.com> wrote:
Update job earlier with the scheduler it is supposed to be scheduled on.
Otherwise we could incorrectly optimize dependencies when moving an entity from one scheduler to another.
I don't think change is really required, the old code should work fine. Read below for more comments.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/scheduler/gpu_scheduler.c | 4 ++-- drivers/gpu/drm/scheduler/sched_fence.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 65078dd3c82c..029863726c99 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -533,8 +533,6 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
sched_job->sched = sched;
sched_job->s_fence->sched = sched; while ((entity->dependency = sched->ops->dependency(sched_job,
entity)))
The job should have the correct scheduler before the sched->oops->dependency() function is called for the first time. Hence I placed the assignment here.
It might also be that I am missing some case so please let me know if such a case exists where this might lead to wrong optimization.
Regards, Nayan
if (drm_sched_entity_add_dependency_cb(entity)) return NULL;
@@ -581,6 +579,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, spin_unlock(&entity->rq_lock); }
sched_job->sched = entity->rq->sched;
sched_job->s_fence->sched = entity->rq->sched; trace_drm_sched_job(sched_job, entity); atomic_inc(&entity->rq->sched->num_jobs); WRITE_ONCE(entity->last_user, current->group_leader);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 4029312fdd81..6dab18d288d7 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -172,7 +172,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, return NULL;
fence->owner = owner;
fence->sched = entity->rq->sched;
fence->sched = NULL; spin_lock_init(&fence->lock); seq = atomic_inc_return(&entity->fence_seq);
-- 2.14.1
Am 06.08.2018 um 15:11 schrieb Nayan Deshmukh:
Hi Christian,
On Mon, Aug 6, 2018 at 5:49 PM Christian König <ckoenig.leichtzumerken@gmail.com mailto:ckoenig.leichtzumerken@gmail.com> wrote:
Update job earlier with the scheduler it is supposed to be scheduled on. Otherwise we could incorrectly optimize dependencies when moving an entity from one scheduler to another.
I don't think change is really required, the old code should work fine. Read below for more comments.
Signed-off-by: Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> --- drivers/gpu/drm/scheduler/gpu_scheduler.c | 4 ++-- drivers/gpu/drm/scheduler/sched_fence.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 65078dd3c82c..029863726c99 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -533,8 +533,6 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL; - sched_job->sched = sched; - sched_job->s_fence->sched = sched; while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
The job should have the correct scheduler before the sched->oops->dependency() function is called for the first time. Hence I placed the assignment here.
It might also be that I am missing some case so please let me know if such a case exists where this might lead to wrong optimization.
The problem is that the scheduler fence of job A could be the dependency of another job B
Job B then makes an incorrect optimization because it sees the wrong scheduler in the scheduler fence of job A.
Regards, Christian.
Regards, Nayan
if (drm_sched_entity_add_dependency_cb(entity)) return NULL; @@ -581,6 +579,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, spin_unlock(&entity->rq_lock); } + sched_job->sched = entity->rq->sched; + sched_job->s_fence->sched = entity->rq->sched; trace_drm_sched_job(sched_job, entity); atomic_inc(&entity->rq->sched->num_jobs); WRITE_ONCE(entity->last_user, current->group_leader); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 4029312fdd81..6dab18d288d7 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -172,7 +172,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, return NULL; fence->owner = owner; - fence->sched = entity->rq->sched; + fence->sched = NULL; spin_lock_init(&fence->lock); seq = atomic_inc_return(&entity->fence_seq); -- 2.14.1
On Mon, Aug 6, 2018 at 6:45 PM Christian König < ckoenig.leichtzumerken@gmail.com> wrote:
Am 06.08.2018 um 15:11 schrieb Nayan Deshmukh:
Hi Christian,
On Mon, Aug 6, 2018 at 5:49 PM Christian König < ckoenig.leichtzumerken@gmail.com> wrote:
Update job earlier with the scheduler it is supposed to be scheduled on.
Otherwise we could incorrectly optimize dependencies when moving an entity from one scheduler to another.
I don't think change is really required, the old code should work fine. Read below for more comments.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/scheduler/gpu_scheduler.c | 4 ++-- drivers/gpu/drm/scheduler/sched_fence.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 65078dd3c82c..029863726c99 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -533,8 +533,6 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (!sched_job) return NULL;
sched_job->sched = sched;
sched_job->s_fence->sched = sched; while ((entity->dependency = sched->ops->dependency(sched_job,
entity)))
The job should have the correct scheduler before the sched->oops->dependency() function is called for the first time. Hence I placed the assignment here.
It might also be that I am missing some case so please let me know if such a case exists where this might lead to wrong optimization.
The problem is that the scheduler fence of job A could be the dependency of another job B
Job B then makes an incorrect optimization because it sees the wrong scheduler in the scheduler fence of job A.
Ah..I missed this case. This make sense now. Reviewed-by: Nayan Deshmukh <
nayan26deshmukh@gmail.com>
Regards, Christian.
Regards, Nayan
if (drm_sched_entity_add_dependency_cb(entity)) return NULL;
@@ -581,6 +579,8 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job, spin_unlock(&entity->rq_lock); }
sched_job->sched = entity->rq->sched;
sched_job->s_fence->sched = entity->rq->sched; trace_drm_sched_job(sched_job, entity); atomic_inc(&entity->rq->sched->num_jobs); WRITE_ONCE(entity->last_user, current->group_leader);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 4029312fdd81..6dab18d288d7 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -172,7 +172,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, return NULL;
fence->owner = owner;
fence->sched = entity->rq->sched;
fence->sched = NULL; spin_lock_init(&fence->lock); seq = atomic_inc_return(&entity->fence_seq);
-- 2.14.1
dri-devel@lists.freedesktop.org