the following patches are from the PREEMPT_RT queue. It is mostly about disabling interrupts/preemption which leads to problems. Patches 1-4 are independent of PREEMPT_RT.
Unfortunately DRM_I915_LOW_LEVEL_TRACEPOINTS had to be disabled because it acquires locks from within trace points. It has been pointed out in the previous post that this makes any kind of debugging impossible. Making the uncore.lock a raw_spinlock_t doubles the latencies in my limited testing [0]. I don't know the difference on other hardware. Also it worries me a little that wait_for_atomic() has 50ms as the upper spin/wait limit if the condition does not become true.
I tested it on a SandyBridge with built-in i915 by using X, OpenGL and playing videos without noticing any warnings. However, some code paths were not entered.
[0] https://lore.kernel.org/all/20211006164628.s2mtsdd2jdbfyf7g@linutronix.de/
Sebastian
This is a revert of commits d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe") 6c69a45445af9 ("drm/i915/gt: Mark context->active_count as protected by timeline->mutex")
The existing code leads to a different behaviour depending on whether lockdep is enabled or not. Any following lock that is acquired without disabling interrupts (but needs to) will not be noticed by lockdep.
This it not just a lockdep annotation but is used but an actual mutex_t that is properly used as a lock but in case of __timeline_mark_lock() lockdep is only told that it is acquired but no lock has been acquired.
It appears that its purpose is just satisfy the lockdep_assert_held() check in intel_context_mark_active(). The other problem with disabling interrupts is that on PREEMPT_RT interrupts are also disabled which leads to problems for instance later during memory allocation.
Add a CONTEXT_IS_PARKED bit to intel_engine_cs and set_bit/clear_bit it instead of mutex_acquire/mutex_release. Use test_bit in the two identified spots which relied on the lockdep annotation.
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Suggested--by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/gt/intel_context.h | 3 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 38 +------------------ drivers/gpu/drm/i915/i915_request.h | 3 +- 4 files changed, 7 insertions(+), 38 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 246c37d72cd73..8a575629ef1b6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -211,7 +211,8 @@ static inline void intel_context_enter(struct intel_context *ce)
static inline void intel_context_mark_active(struct intel_context *ce) { - lockdep_assert_held(&ce->timeline->mutex); + lockdep_assert(lockdep_is_held(&ce->timeline->mutex) || + test_bit(CONTEXT_IS_PARKED, &ce->flags)); ++ce->active_count; }
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 9e0177dc5484e..329f470d125f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -118,6 +118,7 @@ struct intel_context { #define CONTEXT_LRCA_DIRTY 9 #define CONTEXT_GUC_INIT 10 #define CONTEXT_PERMA_PIN 11 +#define CONTEXT_IS_PARKED 12
struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index a1334b48dde7b..456f1f5d0c04e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -80,39 +80,6 @@ static int __engine_unpark(struct intel_wakeref *wf) return 0; }
-#if IS_ENABLED(CONFIG_LOCKDEP) - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - unsigned long flags; - - local_irq_save(flags); - mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); - - return flags; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ - mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); - local_irq_restore(flags); -} - -#else - -static unsigned long __timeline_mark_lock(struct intel_context *ce) -{ - return 0; -} - -static void __timeline_mark_unlock(struct intel_context *ce, - unsigned long flags) -{ -} - -#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ - static void duration(struct dma_fence *fence, struct dma_fence_cb *cb) { struct i915_request *rq = to_request(fence); @@ -159,7 +126,6 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) { struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - unsigned long flags; bool result = true;
/* @@ -214,7 +180,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * engine->wakeref.count, we may see the request completion and retire * it causing an underflow of the engine->wakeref. */ - flags = __timeline_mark_lock(ce); + set_bit(CONTEXT_IS_PARKED, &ce->flags); GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
rq = __i915_request_create(ce, GFP_NOWAIT); @@ -246,7 +212,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
result = false; out_unlock: - __timeline_mark_unlock(ce, flags); + clear_bit(CONTEXT_IS_PARKED, &ce->flags); return result; }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index dc359242d1aec..c5898882bb27a 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -642,7 +642,8 @@ i915_request_timeline(const struct i915_request *rq) { /* Valid only while the request is being constructed (or retired). */ return rcu_dereference_protected(rq->timeline, - lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); + lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex) || + test_bit(CONTEXT_IS_PARKED, &rq->context->flags)); }
static inline struct i915_gem_context *
Disabling interrupts and invoking the irq_work function directly breaks on PREEMPT_RT. PREEMPT_RT does not invoke all irq_work from hardirq context because some of the user have spinlock_t locking in the callback function. These locks are then turned into a sleeping locks which can not be acquired with disabled interrupts.
Using irq_work_queue() has the benefit that the irqwork will be invoked in the regular context. In general there is "no" delay between enqueuing the callback and its invocation because the interrupt is raised right away on architectures which support it (which includes x86).
Use irq_work_queue() + irq_work_sync() instead invoking the callback directly.
Reported-by: Clark Williams williams@redhat.com Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 209cf265bf746..6e1b9068d944c 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) /* Kick the work once more to drain the signalers, and disarm the irq */ irq_work_sync(&b->irq_work); while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { - local_irq_disable(); - signal_irq_work(&b->irq_work); - local_irq_enable(); + irq_work_queue(&b->irq_work); cond_resched(); + irq_work_sync(&b->irq_work); } }
execlists_dequeue() is invoked from a function which uses local_irq_disable() to disable interrupts so the spin_lock() behaves like spin_lock_irq(). This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not the same as spin_lock_irq().
execlists_dequeue_irq() and execlists_dequeue() has each one caller only. If intel_engine_cs::active::lock is acquired and released with the _irq suffix then it behaves almost as if execlists_dequeue() would be invoked with disabled interrupts. The difference is the last part of the function which is then invoked with enabled interrupts. I can't tell if this makes a difference. From looking at it, it might work to move the last unlock at the end of the function as I didn't find anything that would acquire the lock again.
Reported-by: Clark Williams williams@redhat.com Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com --- .../drm/i915/gt/intel_execlists_submission.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index bedb80057046a..1dbcac05f44eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -1284,7 +1284,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */
- spin_lock(&sched_engine->lock); + spin_lock_irq(&sched_engine->lock);
/* * If the queue is higher priority than the last @@ -1384,7 +1384,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock); return; } } @@ -1410,7 +1410,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.sched_engine->lock); - spin_unlock(&engine->sched_engine->lock); + spin_unlock_irq(&engine->sched_engine->lock); return; /* leave this for another sibling */ }
@@ -1572,7 +1572,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); - spin_unlock(&sched_engine->lock); + spin_unlock_irq(&sched_engine->lock);
/* * We can skip poking the HW if we ended up with exactly the same set @@ -1598,13 +1598,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } }
-static void execlists_dequeue_irq(struct intel_engine_cs *engine) -{ - local_irq_disable(); /* Suspend interrupts across request submission */ - execlists_dequeue(engine); - local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ -} - static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); @@ -2424,7 +2417,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) }
if (!engine->execlists.pending[0]) { - execlists_dequeue_irq(engine); + execlists_dequeue(engine); start_timeslice(engine); }
The !irqs_disabled() check triggers on PREEMPT_RT even with i915_sched_engine::lock acquired. The reason is the lock is transformed into a sleeping lock on PREEMPT_RT and does not disable interrupts.
There is no need to check for disabled interrupts. The lockdep annotation below already check if the lock has been acquired by the caller and will yell if the interrupts are not disabled.
Remove the !irqs_disabled() check.
Reported-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/i915_request.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 820a1f38b271e..3bbe34ff3b15a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -559,7 +559,6 @@ bool __i915_request_submit(struct i915_request *request)
RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock);
/* @@ -668,7 +667,6 @@ void __i915_request_unsubmit(struct i915_request *request) */ RQ_TRACE(request, "\n");
- GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->sched_engine->lock);
/*
From: Mike Galbraith umgwanakikbuti@gmail.com
Mario Kleiner suggest in commit ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.")
a spots where preemption should be disabled on PREEMPT_RT. The difference is that on PREEMPT_RT the intel_uncore::lock disables neither preemption nor interrupts and so region remains preemptible.
The area covers only register reads and writes. The part that worries me is: - __intel_get_crtc_scanline() the worst case is 100us if no match is found.
- intel_crtc_scanlines_since_frame_timestamp() not sure how long this may take in the worst case.
It was in the RT queue for a while and nobody complained. Disable preemption on PREEPMPT_RT during timestamping.
[bigeasy: patch description.]
Cc: Mario Kleiner mario.kleiner.de@gmail.com Signed-off-by: Mike Galbraith umgwanakikbuti@gmail.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 77680bca46eec..be8faaaa60226 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -916,7 +916,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, */ spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable();
/* Get optional system timestamp before query. */ if (stime) @@ -980,7 +981,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, if (etime) *etime = ktime_get();
- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable();
spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
From: Mike Galbraith umgwanakikbuti@gmail.com
Commit 8d7849db3eab7 ("drm/i915: Make sprite updates atomic")
started disabling interrupts across atomic updates. This breaks on PREEMPT_RT because within this section the code attempt to acquire spinlock_t locks which are sleeping locks on PREEMPT_RT.
According to the comment the interrupts are disabled to avoid random delays and not required for protection or synchronisation. If this needs to happen with disabled interrupts on PREEMPT_RT, and the whole section is restricted to register access then all sleeping locks need to be acquired before interrupts are disabled and some function maybe moved after enabling interrupts again. This includes: - prepare_to_wait() + finish_wait() due its wake queue. - drm_crtc_vblank_put() -> vblank_disable_fn() drm_device::vbl_lock. - skl_pfit_enable(), intel_update_plane(), vlv_atomic_update_fifo() and maybe others due to intel_uncore::lock - drm_crtc_arm_vblank_event() due to drm_device::event_lock and drm_device::vblank_time_lock.
Don't disable interrupts on PREEMPT_RT during atomic updates.
[bigeasy: drop local locks, commit message]
Signed-off-by: Mike Galbraith umgwanakikbuti@gmail.com Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/display/intel_crtc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c index 254e67141a776..7a39029b083f4 100644 --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c @@ -425,7 +425,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) */ intel_psr_wait_for_idle(new_crtc_state);
- local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable();
crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; @@ -450,11 +451,13 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) break; }
- local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable();
timeout = schedule_timeout(timeout);
- local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); }
finish_wait(wq, &wait); @@ -487,7 +490,8 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) return;
irq_disable: - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); }
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) @@ -566,7 +570,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) new_crtc_state->uapi.event = NULL; }
- local_irq_enable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable();
/* Send VRR Push to terminate Vblank */ intel_vrr_send_push(new_crtc_state);
The !in_atomic() check in _wait_for_atomic() triggers on PREEMPT_RT because the uncore::lock is a spinlock_t and does not disable preemption or interrupts.
Changing the uncore:lock to a raw_spinlock_t doubles the worst case latency on an otherwise idle testbox during testing. Therefore I'm currently unsure about changing this.
Link: https://lore.kernel.org/all/20211006164628.s2mtsdd2jdbfyf7g@linutronix.de/ Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/i915_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 7a5925072466a..b7b56fb1e2fc7 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -344,7 +344,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ -#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) +#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) #else # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
Luca Abeni reported this: | BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 | CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10 | Call Trace: | rt_spin_lock+0x3f/0x50 | gen6_read32+0x45/0x1d0 [i915] | g4x_get_vblank_counter+0x36/0x40 [i915] | trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915]
The tracing events use trace_i915_pipe_update_start() among other events use functions acquire spinlock_t locks which are transformed into sleeping locks on PREEMPT_RT. A few trace points use intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also might acquire a sleeping locks on PREEMPT_RT. At the time the arguments are evaluated within trace point, preemption is disabled and so the locks must not be acquired on PREEMPT_RT.
Based on this I don't see any other way than disable trace points on PREMPT_RT.
Reported-by: Luca Abeni lucabe72@gmail.com Cc: Steven Rostedt rostedt@goodmis.org Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de --- drivers/gpu/drm/i915/i915_trace.h | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 9795f456cccfc..0f8341ca67385 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -2,6 +2,10 @@ #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _I915_TRACE_H_
+#ifdef CONFIG_PREEMPT_RT +#define NOTRACE +#endif + #include <linux/stringify.h> #include <linux/types.h> #include <linux/tracepoint.h>
The order of the header files is important. If this header file is included after tracepoint.h was included then the NOTRACE here becomes a nop. Currently this happens for two .c files which use the tracepoitns behind DRM_I915_LOW_LEVEL_TRACEPOINTS.
Cc: Steven Rostedt rostedt@goodmis.org Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Signed-off-by: Thomas Gleixner tglx@linutronix.de --- drivers/gpu/drm/i915/i915_trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0f8341ca67385..0aefb14c638ae 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -826,7 +826,7 @@ DEFINE_EVENT(i915_request, i915_request_add, TP_ARGS(rq) );
-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) DEFINE_EVENT(i915_request, i915_request_guc_submit, TP_PROTO(struct i915_request *rq), TP_ARGS(rq)
dri-devel@lists.freedesktop.org