On Tue, 29 Jun 2021 09:35:03 +0200 Boris Brezillon boris.brezillon@collabora.com wrote:
@@ -379,57 +370,72 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev) job_write(pfdev, JOB_INT_MASK, irq_mask); }
-static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue,
struct drm_sched_job *bad)
+static void panfrost_reset(struct panfrost_device *pfdev,
struct drm_sched_job *bad)
{
- enum panfrost_queue_status old_status;
- bool stopped = false;
- unsigned int i;
- bool cookie;
- mutex_lock(&queue->lock);
- old_status = atomic_xchg(&queue->status,
PANFROST_QUEUE_STATUS_STOPPED);
- if (old_status == PANFROST_QUEUE_STATUS_STOPPED)
goto out;
- if (!atomic_read(&pfdev->reset.pending))
return;
- /* Stop the schedulers.
*
* FIXME: We temporarily get out of the dma_fence_signalling section
* because the cleanup path generate lockdep splats when taking locks
* to release job resources. We should rework the code to follow this
* pattern:
*
* try_lock
* if (locked)
* release
* else
* schedule_work_to_release_later
*/
- for (i = 0; i < NUM_JOB_SLOTS; i++)
drm_sched_stop(&pfdev->js->queue[i].sched, bad);
- cookie = dma_fence_begin_signalling();
WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE);
drm_sched_stop(&queue->sched, bad); if (bad) drm_sched_increase_karma(bad);
stopped = true;
- spin_lock(&pfdev->js->job_lock);
- for (i = 0; i < NUM_JOB_SLOTS; i++) {
if (pfdev->jobs[i]) {
pm_runtime_put_noidle(pfdev->dev);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
pfdev->jobs[i] = NULL;
}
- }
- spin_unlock(&pfdev->js->job_lock);
- /*
* Set the timeout to max so the timer doesn't get started
* when we return from the timeout handler (restored in
* panfrost_scheduler_start()).
- panfrost_device_reset(pfdev);
- /* GPU has been reset, we can cancel timeout/fault work that may have
*/* been queued in the meantime and clear the reset pending bit.
- queue->sched.timeout = MAX_SCHEDULE_TIMEOUT;
- atomic_set(&pfdev->reset.pending, 0);
- for (i = 0; i < NUM_JOB_SLOTS; i++)
cancel_delayed_work(&pfdev->js->queue[i].sched.work_tdr);
Those cancel_delayed_work() calls are useless, drm_sched_stop() canceled those works already. I'll get rid of them in v6.