Hi all,
This fixes the dreaded gpu reset vs. modeset deadlocks. And since the defunct legacy page_flip code is the reason for a bunch of special cases I did remove that too, on Maarten's request.
Please review, this is currently blocking extended CI runs on our haswell farm rather badly. The critical patches are up to patch 5.
Compared to last time around I've dropped two patches of dubious benefit, they broke gen3/4 reset a bit and aren't really needed.
Thanks, Daniel
Daniel Vetter (9): drm/i915: Nuke legacy flip queueing code drm/i915: Unbreak gpu reset vs. modeset locking drm/i915: Avoid the gpu reset vs. modeset deadlock drm/i915: Push i915_sw_fence_wait into the nonblocking atomic commit drm/i915: More surgically unbreak the modeset vs reset deadlock drm/i915: Rip out legacy page_flip completion/irq handling drm/i915: adjust has_pending_fb_unpin to atomic drm/i915: Remove intel_flip_work infrastructure drm/i915: Drop unpin stall in atomic_prepare_commit
drivers/gpu/drm/i915/i915_debugfs.c | 70 --- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 10 +- drivers/gpu/drm/i915/i915_gem.c | 2 - drivers/gpu/drm/i915/i915_irq.c | 151 +---- drivers/gpu/drm/i915/intel_display.c | 1129 +++------------------------------- drivers/gpu/drm/i915/intel_drv.h | 26 +- drivers/gpu/drm/i915/intel_sprite.c | 8 +- 8 files changed, 94 insertions(+), 1303 deletions(-)
Just a very minimal patch to nuke that code. Lots of the flip interrupt handling stuff is still around.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 - drivers/gpu/drm/i915/intel_display.c | 656 ----------------------------------- 2 files changed, 661 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f66c78d3a0a2..07e98b07c5bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -715,11 +715,6 @@ struct drm_i915_display_funcs { void (*fdi_link_train)(struct intel_crtc *crtc, const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); - int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags); void (*hpd_irq_setup)(struct drm_i915_private *dev_priv); /* clock updates for mode set */ /* cursor updates */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 74b0ea1badc3..3349ca947173 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2664,20 +2664,6 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; }
-/* Update plane->state->fb to match plane->fb after driver-internal updates */ -static void -update_state_fb(struct drm_plane *plane) -{ - if (plane->fb == plane->state->fb) - return; - - if (plane->state->fb) - drm_framebuffer_unreference(plane->state->fb); - plane->state->fb = plane->fb; - if (plane->state->fb) - drm_framebuffer_reference(plane->state->fb); -} - static void intel_set_plane_visible(struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state, @@ -10163,35 +10149,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) kfree(intel_crtc); }
-static void intel_unpin_work_fn(struct work_struct *__work) -{ - struct intel_flip_work *work = - container_of(__work, struct intel_flip_work, unpin_work); - struct intel_crtc *crtc = to_intel_crtc(work->crtc); - struct drm_device *dev = crtc->base.dev; - struct drm_plane *primary = crtc->base.primary; - - if (is_mmio_work(work)) - flush_work(&work->mmio_work); - - mutex_lock(&dev->struct_mutex); - intel_unpin_fb_vma(work->old_vma); - i915_gem_object_put(work->pending_flip_obj); - mutex_unlock(&dev->struct_mutex); - - i915_gem_request_put(work->flip_queued_req); - - intel_frontbuffer_flip_complete(to_i915(dev), - to_intel_plane(primary)->frontbuffer_bit); - intel_fbc_post_update(crtc); - drm_framebuffer_unreference(work->old_fb); - - BUG_ON(atomic_read(&crtc->unpin_work_count) == 0); - atomic_dec(&crtc->unpin_work_count); - - kfree(work); -} - /* Is 'a' after or equal to 'b'? */ static bool g4x_flip_count_after_eq(u32 a, u32 b) { @@ -10336,346 +10293,6 @@ static inline void intel_mark_page_flip_active(struct intel_crtc *crtc, atomic_set(&work->pending, 1); }
-static int intel_gen2_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask, *cs; - - cs = intel_ring_begin(req, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Can't queue multiple flips, so wait for the previous - * one to finish before executing the next. - */ - if (intel_crtc->plane) - flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; - else - flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - *cs++ = MI_WAIT_FOR_EVENT | flip_mask; - *cs++ = MI_NOOP; - *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); - *cs++ = fb->pitches[0]; - *cs++ = intel_crtc->flip_work->gtt_offset; - *cs++ = 0; /* aux display base address, unused */ - - return 0; -} - -static int intel_gen3_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask, *cs; - - cs = intel_ring_begin(req, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (intel_crtc->plane) - flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; - else - flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - *cs++ = MI_WAIT_FOR_EVENT | flip_mask; - *cs++ = MI_NOOP; - *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); - *cs++ = fb->pitches[0]; - *cs++ = intel_crtc->flip_work->gtt_offset; - *cs++ = MI_NOOP; - - return 0; -} - -static int intel_gen4_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 pf, pipesrc, *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* i965+ uses the linear or tiled offsets from the - * Display Registers (which do not change across a page-flip) - * so we need only reprogram the base address. - */ - *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); - *cs++ = fb->pitches[0]; - *cs++ = intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier); - - /* XXX Enabling the panel-fitter across page-flip is so far - * untested on non-native modes, so ignore it for now. - * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE; - */ - pf = 0; - pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - *cs++ = pf | pipesrc; - - return 0; -} - -static int intel_gen6_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 pf, pipesrc, *cs; - - cs = intel_ring_begin(req, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); - *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); - *cs++ = intel_crtc->flip_work->gtt_offset; - - /* Contrary to the suggestions in the documentation, - * "Enable Panel Fitter" does not seem to be required when page - * flipping with a non-native mode, and worse causes a normal - * modeset to fail. - * pf = I915_READ(PF_CTL(intel_crtc->pipe)) & PF_ENABLE; - */ - pf = 0; - pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - *cs++ = pf | pipesrc; - - return 0; -} - -static int intel_gen7_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 *cs, plane_bit = 0; - int len, ret; - - switch (intel_crtc->plane) { - case PLANE_A: - plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A; - break; - case PLANE_B: - plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B; - break; - case PLANE_C: - plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C; - break; - default: - WARN_ONCE(1, "unknown plane in flip command\n"); - return -ENODEV; - } - - len = 4; - if (req->engine->id == RCS) { - len += 6; - /* - * On Gen 8, SRM is now taking an extra dword to accommodate - * 48bits addresses, and we need a NOOP for the batch size to - * stay even. - */ - if (IS_GEN8(dev_priv)) - len += 2; - } - - /* - * BSpec MI_DISPLAY_FLIP for IVB: - * "The full packet must be contained within the same cache line." - * - * Currently the LRI+SRM+MI_DISPLAY_FLIP all fit within the same - * cacheline, if we ever start emitting more commands before - * the MI_DISPLAY_FLIP we may need to first emit everything else, - * then do the cacheline alignment, and finally emit the - * MI_DISPLAY_FLIP. - */ - ret = intel_ring_cacheline_align(req); - if (ret) - return ret; - - cs = intel_ring_begin(req, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Unmask the flip-done completion message. Note that the bspec says that - * we should do this for both the BCS and RCS, and that we must not unmask - * more than one flip event at any time (or ensure that one flip message - * can be sent by waiting for flip-done prior to queueing new flips). - * Experimentation says that BCS works despite DERRMR masking all - * flip-done completion events and that unmasking all planes at once - * for the RCS also doesn't appear to drop events. Setting the DERRMR - * to zero does lead to lockups within MI_DISPLAY_FLIP. - */ - if (req->engine->id == RCS) { - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(DERRMR); - *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE); - if (IS_GEN8(dev_priv)) - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT; - else - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - *cs++ = i915_mmio_reg_offset(DERRMR); - *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; - if (IS_GEN8(dev_priv)) { - *cs++ = 0; - *cs++ = MI_NOOP; - } - } - - *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; - *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); - *cs++ = intel_crtc->flip_work->gtt_offset; - *cs++ = MI_NOOP; - - return 0; -} - -static bool use_mmio_flip(struct intel_engine_cs *engine, - struct drm_i915_gem_object *obj) -{ - /* - * This is not being used for older platforms, because - * non-availability of flip done interrupt forces us to use - * CS flips. Older platforms derive flip done using some clever - * tricks involving the flip_pending status bits and vblank irqs. - * So using MMIO flips there would disrupt this mechanism. - */ - - if (engine == NULL) - return true; - - if (INTEL_GEN(engine->i915) < 5) - return false; - - if (i915.use_mmio_flip < 0) - return false; - else if (i915.use_mmio_flip > 0) - return true; - else if (i915.enable_execlists) - return true; - - return engine != i915_gem_object_last_write_engine(obj); -} - -static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, - unsigned int rotation, - struct intel_flip_work *work) -{ - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_framebuffer *fb = intel_crtc->base.primary->fb; - const enum pipe pipe = intel_crtc->pipe; - u32 ctl, stride = skl_plane_stride(fb, 0, rotation); - - ctl = I915_READ(PLANE_CTL(pipe, 0)); - ctl &= ~PLANE_CTL_TILED_MASK; - switch (fb->modifier) { - case DRM_FORMAT_MOD_LINEAR: - break; - case I915_FORMAT_MOD_X_TILED: - ctl |= PLANE_CTL_TILED_X; - break; - case I915_FORMAT_MOD_Y_TILED: - ctl |= PLANE_CTL_TILED_Y; - break; - case I915_FORMAT_MOD_Yf_TILED: - ctl |= PLANE_CTL_TILED_YF; - break; - default: - MISSING_CASE(fb->modifier); - } - - /* - * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on - * PLANE_SURF updates, the update is then guaranteed to be atomic. - */ - I915_WRITE(PLANE_CTL(pipe, 0), ctl); - I915_WRITE(PLANE_STRIDE(pipe, 0), stride); - - I915_WRITE(PLANE_SURF(pipe, 0), work->gtt_offset); - POSTING_READ(PLANE_SURF(pipe, 0)); -} - -static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc, - struct intel_flip_work *work) -{ - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_framebuffer *fb = intel_crtc->base.primary->fb; - i915_reg_t reg = DSPCNTR(intel_crtc->plane); - u32 dspcntr; - - dspcntr = I915_READ(reg); - - if (fb->modifier == I915_FORMAT_MOD_X_TILED) - dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - - I915_WRITE(reg, dspcntr); - - I915_WRITE(DSPSURF(intel_crtc->plane), work->gtt_offset); - POSTING_READ(DSPSURF(intel_crtc->plane)); -} - -static void intel_mmio_flip_work_func(struct work_struct *w) -{ - struct intel_flip_work *work = - container_of(w, struct intel_flip_work, mmio_work); - struct intel_crtc *crtc = to_intel_crtc(work->crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_framebuffer *intel_fb = - to_intel_framebuffer(crtc->base.primary->fb); - struct drm_i915_gem_object *obj = intel_fb->obj; - - WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0); - - intel_pipe_update_start(crtc); - - if (INTEL_GEN(dev_priv) >= 9) - skl_do_mmio_flip(crtc, work->rotation, work); - else - /* use_mmio_flip() retricts MMIO flips to ilk+ */ - ilk_do_mmio_flip(crtc, work); - - intel_pipe_update_end(crtc, work); -} - -static int intel_default_queue_flip(struct drm_device *dev, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_request *req, - uint32_t flags) -{ - return -ENODEV; -} - static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_flip_work *work) @@ -10742,251 +10359,6 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); }
-__maybe_unused -static int intel_crtc_page_flip(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_framebuffer *old_fb = crtc->primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_plane *primary = crtc->primary; - enum pipe pipe = intel_crtc->pipe; - struct intel_flip_work *work; - struct intel_engine_cs *engine; - bool mmio_flip; - struct drm_i915_gem_request *request; - struct i915_vma *vma; - int ret; - - /* - * drm_mode_page_flip_ioctl() should already catch this, but double - * check to be safe. In the future we may enable pageflipping from - * a disabled primary plane. - */ - if (WARN_ON(intel_fb_obj(old_fb) == NULL)) - return -EBUSY; - - /* Can't change pixel format via MI display flips. */ - if (fb->format != crtc->primary->fb->format) - return -EINVAL; - - /* - * TILEOFF/LINOFF registers can't be changed via MI display flips. - * Note that pitch changes could also affect these register. - */ - if (INTEL_GEN(dev_priv) > 3 && - (fb->offsets[0] != crtc->primary->fb->offsets[0] || - fb->pitches[0] != crtc->primary->fb->pitches[0])) - return -EINVAL; - - if (i915_terminally_wedged(&dev_priv->gpu_error)) - goto out_hang; - - work = kzalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) - return -ENOMEM; - - work->event = event; - work->crtc = crtc; - work->old_fb = old_fb; - INIT_WORK(&work->unpin_work, intel_unpin_work_fn); - - ret = drm_crtc_vblank_get(crtc); - if (ret) - goto free_work; - - /* We borrow the event spin lock for protecting flip_work */ - spin_lock_irq(&dev->event_lock); - if (intel_crtc->flip_work) { - /* Before declaring the flip queue wedged, check if - * the hardware completed the operation behind our backs. - */ - if (pageflip_finished(intel_crtc, intel_crtc->flip_work)) { - DRM_DEBUG_DRIVER("flip queue: previous flip completed, continuing\n"); - page_flip_completed(intel_crtc); - } else { - DRM_DEBUG_DRIVER("flip queue: crtc already busy\n"); - spin_unlock_irq(&dev->event_lock); - - drm_crtc_vblank_put(crtc); - kfree(work); - return -EBUSY; - } - } - intel_crtc->flip_work = work; - spin_unlock_irq(&dev->event_lock); - - if (atomic_read(&intel_crtc->unpin_work_count) >= 2) - flush_workqueue(dev_priv->wq); - - /* Reference the objects for the scheduled work. */ - drm_framebuffer_reference(work->old_fb); - - crtc->primary->fb = fb; - update_state_fb(crtc->primary); - - work->pending_flip_obj = i915_gem_object_get(obj); - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto cleanup; - - intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error); - if (i915_reset_backoff_or_wedged(&dev_priv->gpu_error)) { - ret = -EIO; - goto unlock; - } - - atomic_inc(&intel_crtc->unpin_work_count); - - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) - work->flip_count = I915_READ(PIPE_FLIPCOUNT_G4X(pipe)) + 1; - - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - engine = dev_priv->engine[BCS]; - if (fb->modifier != old_fb->modifier) - /* vlv: DISPLAY_FLIP fails to change tiling */ - engine = NULL; - } else if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { - engine = dev_priv->engine[BCS]; - } else if (INTEL_GEN(dev_priv) >= 7) { - engine = i915_gem_object_last_write_engine(obj); - if (engine == NULL || engine->id != RCS) - engine = dev_priv->engine[BCS]; - } else { - engine = dev_priv->engine[RCS]; - } - - mmio_flip = use_mmio_flip(engine, obj); - - vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto cleanup_pending; - } - - work->old_vma = to_intel_plane_state(primary->state)->vma; - to_intel_plane_state(primary->state)->vma = vma; - - work->gtt_offset = i915_ggtt_offset(vma) + intel_crtc->dspaddr_offset; - work->rotation = crtc->primary->state->rotation; - - /* - * There's the potential that the next frame will not be compatible with - * FBC, so we want to call pre_update() before the actual page flip. - * The problem is that pre_update() caches some information about the fb - * object, so we want to do this only after the object is pinned. Let's - * be on the safe side and do this immediately before scheduling the - * flip. - */ - intel_fbc_pre_update(intel_crtc, intel_crtc->config, - to_intel_plane_state(primary->state)); - - if (mmio_flip) { - INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - queue_work(system_unbound_wq, &work->mmio_work); - } else { - request = i915_gem_request_alloc(engine, - dev_priv->kernel_context); - if (IS_ERR(request)) { - ret = PTR_ERR(request); - goto cleanup_unpin; - } - - ret = i915_gem_request_await_object(request, obj, false); - if (ret) - goto cleanup_request; - - ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, - page_flip_flags); - if (ret) - goto cleanup_request; - - intel_mark_page_flip_active(intel_crtc, work); - - work->flip_queued_req = i915_gem_request_get(request); - i915_add_request(request); - } - - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); - i915_gem_track_fb(intel_fb_obj(old_fb), obj, - to_intel_plane(primary)->frontbuffer_bit); - mutex_unlock(&dev->struct_mutex); - - intel_frontbuffer_flip_prepare(to_i915(dev), - to_intel_plane(primary)->frontbuffer_bit); - - trace_i915_flip_request(intel_crtc->plane, obj); - - return 0; - -cleanup_request: - i915_add_request(request); -cleanup_unpin: - to_intel_plane_state(primary->state)->vma = work->old_vma; - intel_unpin_fb_vma(vma); -cleanup_pending: - atomic_dec(&intel_crtc->unpin_work_count); -unlock: - mutex_unlock(&dev->struct_mutex); -cleanup: - crtc->primary->fb = old_fb; - update_state_fb(crtc->primary); - - i915_gem_object_put(obj); - drm_framebuffer_unreference(work->old_fb); - - spin_lock_irq(&dev->event_lock); - intel_crtc->flip_work = NULL; - spin_unlock_irq(&dev->event_lock); - - drm_crtc_vblank_put(crtc); -free_work: - kfree(work); - - if (ret == -EIO) { - struct drm_atomic_state *state; - struct drm_plane_state *plane_state; - -out_hang: - state = drm_atomic_state_alloc(dev); - if (!state) - return -ENOMEM; - state->acquire_ctx = dev->mode_config.acquire_ctx; - -retry: - plane_state = drm_atomic_get_plane_state(state, primary); - ret = PTR_ERR_OR_ZERO(plane_state); - if (!ret) { - drm_atomic_set_fb_for_plane(plane_state, fb); - - ret = drm_atomic_set_crtc_for_plane(plane_state, crtc); - if (!ret) - ret = drm_atomic_commit(state); - } - - if (ret == -EDEADLK) { - drm_modeset_backoff(state->acquire_ctx); - drm_atomic_state_clear(state); - goto retry; - } - - drm_atomic_state_put(state); - - if (ret == 0 && event) { - spin_lock_irq(&dev->event_lock); - drm_crtc_send_vblank_event(crtc, event); - spin_unlock_irq(&dev->event_lock); - } - } - return ret; -} - - /** * intel_wm_need_update - Check whether watermarks need updating * @plane: drm plane @@ -14736,34 +14108,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.update_crtcs = skl_update_crtcs; else dev_priv->display.update_crtcs = intel_update_crtcs; - - switch (INTEL_INFO(dev_priv)->gen) { - case 2: - dev_priv->display.queue_flip = intel_gen2_queue_flip; - break; - - case 3: - dev_priv->display.queue_flip = intel_gen3_queue_flip; - break; - - case 4: - case 5: - dev_priv->display.queue_flip = intel_gen4_queue_flip; - break; - - case 6: - dev_priv->display.queue_flip = intel_gen6_queue_flip; - break; - case 7: - case 8: /* FIXME(BDW): Check that the gen8 RCS flip works. */ - dev_priv->display.queue_flip = intel_gen7_queue_flip; - break; - case 9: - /* Drop through - unsupported since execlist only. */ - default: - /* Default just returns -ENODEV to indicate unsupported */ - dev_priv->display.queue_flip = intel_default_queue_flip; - } }
/*
Taking the modeset locks unconditionally isn't the greatest idea, because atm that part is still broken and times out (and then atomic keels over). And there's really no reason to do so, the old code didn't do that either.
To make the patch a bit simpler let's also nuke 2 cases that are only around for the old mmioflip paths. Atomic nonblocking workers will not die (minus bugs) when a gpu reset happens.
And of course this doesn't fix any of the gpu reset vs. modeset deadlock fun, but it at least stop modern CI machines from keeling over all over the place for no reason at all.
And we still have the explicit testcases to run the fake gpu reset, so coverage isn't that much worse.
v2: Split out additional changes on top, restrict this to purely reducing the critical section of modeset locks.
v2: Review from Maarten - update comments - don't oops when state is NULL in intel_finish_reset, but try to at least still drop locks properly. The hw is going to be toast anyway.
Fixes: 739748939974 ("drm/i915: Fix modeset handling during gpu reset, v5.") Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/intel_display.c | 60 +++++++++++------------------------- 1 file changed, 18 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3349ca947173..97777ffa1566 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3413,26 +3413,6 @@ static void intel_complete_page_flips(struct drm_i915_private *dev_priv) intel_finish_page_flip_cs(dev_priv, crtc->pipe); }
-static void intel_update_primary_planes(struct drm_device *dev) -{ - struct drm_crtc *crtc; - - for_each_crtc(dev, crtc) { - struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state = - to_intel_plane_state(plane->base.state); - - if (plane_state->base.visible) { - trace_intel_update_plane(&plane->base, - to_intel_crtc(crtc)); - - plane->update_plane(plane, - to_intel_crtc_state(crtc->state), - plane_state); - } - } -} - static int __intel_display_resume(struct drm_device *dev, struct drm_atomic_state *state, @@ -3485,6 +3465,12 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) struct drm_atomic_state *state; int ret;
+ + /* reset doesn't touch the display */ + if (!i915.force_reset_modeset_test && + !gpu_reset_clobbers_display(dev_priv)) + return; + /* * Need mode_config.mutex so that we don't * trample ongoing ->detect() and whatnot. @@ -3498,12 +3484,6 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
drm_modeset_backoff(ctx); } - - /* reset doesn't touch the display, but flips might get nuked anyway, */ - if (!i915.force_reset_modeset_test && - !gpu_reset_clobbers_display(dev_priv)) - return; - /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. @@ -3533,6 +3513,14 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) struct drm_atomic_state *state = dev_priv->modeset_restore_state; int ret;
+ /* reset doesn't touch the display */ + if (!i915.force_reset_modeset_test && + !gpu_reset_clobbers_display(dev_priv)) + return; + + if (!state) + goto unlock; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3544,22 +3532,10 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
/* reset doesn't touch the display */ if (!gpu_reset_clobbers_display(dev_priv)) { - if (!state) { - /* - * Flips in the rings have been nuked by the reset, - * so update the base address of all primary - * planes to the the last fb to make sure we're - * showing the correct fb after a reset. - * - * FIXME: Atomic will make this obsolete since we won't schedule - * CS-based flips (which might get lost in gpu resets) any more. - */ - intel_update_primary_planes(dev); - } else { - ret = __intel_display_resume(dev, state, ctx); + /* for testing only restore the display */ + ret = __intel_display_resume(dev, state, ctx); if (ret) DRM_ERROR("Restoring old state failed with %i\n", ret); - } } else { /* * The display has been reset as well, @@ -3583,8 +3559,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) intel_hpd_init(dev_priv); }
- if (state) - drm_atomic_state_put(state); + drm_atomic_state_put(state); +unlock: drm_modeset_drop_locks(ctx); drm_modeset_acquire_fini(ctx); mutex_unlock(&dev->mode_config.mutex);
... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in
commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 22 11:56:25 2017 +0100
drm/i915: Break modeset deadlocks on reset
Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern.
And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this.
But for now just start with something that is guaranteed to recover faster, for much better CI througput.
This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender.
Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/intel_display.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 97777ffa1566..010a1f3e000c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,6 +3471,11 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
+ /* We have a modeset vs reset deadlock, defensively unbreak it. + * + * FIXME: We can do a _lot_ better, this is just a first iteration.*/ + i915_gem_set_wedged(dev_priv); + /* * Need mode_config.mutex so that we don't * trample ongoing ->detect() and whatnot.
Quoting Daniel Vetter (2017-07-19 13:54:56)
... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in
commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 22 11:56:25 2017 +0100
drm/i915: Break modeset deadlocks on reset
Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern.
And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this.
But for now just start with something that is guaranteed to recover faster, for much better CI througput.
This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender.
Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com
drivers/gpu/drm/i915/intel_display.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 97777ffa1566..010a1f3e000c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,6 +3471,11 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
You should keep the error message for wedging the device. If all goes well it is removed in a few patches, so shouldn't be an eyesore for long. -Chris
On Wed, Jul 19, 2017 at 3:32 PM, Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Daniel Vetter (2017-07-19 13:54:56)
... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in
commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 22 11:56:25 2017 +0100
drm/i915: Break modeset deadlocks on reset
Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern.
And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this.
But for now just start with something that is guaranteed to recover faster, for much better CI througput.
This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender.
Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com
drivers/gpu/drm/i915/intel_display.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 97777ffa1566..010a1f3e000c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,6 +3471,11 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
You should keep the error message for wedging the device. If all goes well it is removed in a few patches, so shouldn't be an eyesore for long.
Yeah makes sense, just in case the next patches need to be reverted for some reasons. That's why I split it ou. Something like
DRM_ERROR("Wedging gpu to unblock modesets\n");
and then remove that again 2 patches later? -Daniel
On Wed, Jul 19, 2017 at 3:44 PM, Daniel Vetter daniel.vetter@ffwll.ch wrote:
On Wed, Jul 19, 2017 at 3:32 PM, Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Daniel Vetter (2017-07-19 13:54:56)
... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in
commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson chris@chris-wilson.co.uk Date: Thu Jun 22 11:56:25 2017 +0100
drm/i915: Break modeset deadlocks on reset
Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern.
And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this.
But for now just start with something that is guaranteed to recover faster, for much better CI througput.
This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender.
Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com
drivers/gpu/drm/i915/intel_display.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 97777ffa1566..010a1f3e000c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,6 +3471,11 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
You should keep the error message for wedging the device. If all goes well it is removed in a few patches, so shouldn't be an eyesore for long.
Yeah makes sense, just in case the next patches need to be reverted for some reasons. That's why I split it ou. Something like
DRM_ERROR("Wedging gpu to unblock modesets\n");
and then remove that again 2 patches later?
After thinking about this a bit more, s/DRM_ERROR/DRM_DEBUG_KMS/ or so. We know we can deadlock here, complaining about that only spams dmesg and results in noise in CI, hiding worse stuff. The timeout based wedging as fallback should have a DRM_ERROR since it's unexpected, this one here imo sholdn't. And with the follow-up patches it won't be unconditional anymore (if we don't have to revert them again). -Daniel
Blocking in a worker is ok, that's what the unbound_wq is for. And it unifies the paths between the blocking and nonblocking commit, giving me just one path where I have to implement the deadlock avoidance trickery in the next patch.
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/intel_display.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 010a1f3e000c..5aa7ca1ab592 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12397,6 +12397,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) unsigned crtc_vblank_mask = 0; int i;
+ i915_sw_fence_wait(&intel_state->commit_ready); + drm_atomic_helper_wait_for_dependencies(state);
if (intel_state->modeset) @@ -12564,10 +12566,7 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence,
switch (notify) { case FENCE_COMPLETE: - if (state->base.commit_work.func) - queue_work(system_unbound_wq, &state->base.commit_work); break; - case FENCE_FREE: { struct intel_atomic_helper *helper = @@ -12671,14 +12670,14 @@ static int intel_atomic_commit(struct drm_device *dev, }
drm_atomic_state_get(state); - INIT_WORK(&state->commit_work, - nonblock ? intel_atomic_commit_work : NULL); + INIT_WORK(&state->commit_work, intel_atomic_commit_work);
i915_sw_fence_commit(&intel_state->commit_ready); - if (!nonblock) { - i915_sw_fence_wait(&intel_state->commit_ready); + if (nonblock) + queue_work(system_unbound_wq, &state->commit_work); + else intel_atomic_commit_tail(state); - } +
return 0; }
Quoting Daniel Vetter (2017-07-19 13:54:57)
Blocking in a worker is ok,
but needlessly inefficient,
that's what the unbound_wq is for. And it unifies the paths between the blocking and nonblocking commit, giving me just one path where I have to implement the deadlock avoidance trickery in the next patch.
For reference, I did that the other way by moving it all over to fences. -Chris
On Wed, Jul 19, 2017 at 02:04:25PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:54:57)
Blocking in a worker is ok,
but needlessly inefficient,
that's what the unbound_wq is for. And it unifies the paths between the blocking and nonblocking commit, giving me just one path where I have to implement the deadlock avoidance trickery in the next patch.
For reference, I did that the other way by moving it all over to fences.
Yeah the commit message fails to explain this here:
"I first tried to implement the following patch without this rework, but force-completing i915_sw_fence creates some serious challenges around properly cleaning things up. So wasn't a feasible short-term approach. Another approach would be to simple keep track of all pending atomic commit work items and manually queue them from the reset code. With the caveat that double-queue in case we race with the i915_sw_fence must be avoided. Given all that, taking the cost of a double schedule in atomic for the short-term fix is the best approach, but can be changed in the future of course."
Thanks, Daniel
There's no reason to entirely wedge the gpu, for the minimal deadlock bugfix we only need to unbreak/decouple the atomic commit from the gpu reset. The simplest wait to fix that is by replacing the unconditional fence wait a the top of commit_tail by a wait which completes either when the fences are done (normal case, or when a reset doesn't need to touch the display state). Or when the gpu reset needs to force-unblock all pending modeset states.
Note that in both cases TDR itself keeps working, so from a userspace pov this trickery isn't observable. Users themselvs might spot a short glitch while the rendering is catching up again, but that's still better than pre-TDR where we've thrown away all the rendering, including innocent batches. Also, this fixes the regression TDR introduced of making gpu resets deadlock-prone when we do need to touch the display.
One thing I noticed is that gpu_error.flags seems to use both our own wait-queue in gpu_error.wait_queue, and the generic wait_on_bit facilities. Not entirely sure why this inconsistency exists, I just picked one style.
A possible future avenue could be to insert the gpu reset in-between ongoing modeset changes, which would avoid the momentary glitch. But that's a lot more work to implement in the atomic commit machinery, and given that we only need this for pre-g4x hw, of questionable utility just for the sake of polishing gpu reset even more on those old boxes. It might be useful for other features though.
v2: Rebase onto 4.13 with a s/wait_queue_t/struct wait_queue_entry/.
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07e98b07c5bc..369968539b40 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1564,6 +1564,7 @@ struct i915_gpu_error { unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_HANDOFF 1 +#define I915_RESET_MODESET 2 #define I915_WEDGED (BITS_PER_LONG - 1) #define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5aa7ca1ab592..4762f158032d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,10 +3471,9 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
- /* We have a modeset vs reset deadlock, defensively unbreak it. - * - * FIXME: We can do a _lot_ better, this is just a first iteration.*/ - i915_gem_set_wedged(dev_priv); + /* We have a modeset vs reset deadlock, defensively unbreak it. */ + set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); + wake_up_all(&dev_priv->gpu_error.wait_queue);
/* * Need mode_config.mutex so that we don't @@ -3569,6 +3568,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) drm_modeset_drop_locks(ctx); drm_modeset_acquire_fini(ctx); mutex_unlock(&dev->mode_config.mutex); + + clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); }
static bool abort_flip_on_reset(struct intel_crtc *crtc) @@ -12384,6 +12385,30 @@ static void intel_atomic_helper_free_state_worker(struct work_struct *work) intel_atomic_helper_free_state(dev_priv); }
+static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state) +{ + struct wait_queue_entry wait_fence, wait_reset; + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); + + init_wait_entry(&wait_fence, 0); + init_wait_entry(&wait_reset, 0); + for (;;) { + prepare_to_wait(&intel_state->commit_ready.wait, + &wait_fence, TASK_UNINTERRUPTIBLE); + prepare_to_wait(&dev_priv->gpu_error.wait_queue, + &wait_reset, TASK_UNINTERRUPTIBLE); + + + if (i915_sw_fence_done(&intel_state->commit_ready) + || (dev_priv->gpu_error.flags & I915_RESET_MODESET)) + break; + + schedule(); + } + finish_wait(&intel_state->commit_ready.wait, &wait_fence); + finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -12397,7 +12422,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) unsigned crtc_vblank_mask = 0; int i;
- i915_sw_fence_wait(&intel_state->commit_ready); + intel_atomic_commit_fence_wait(intel_state);
drm_atomic_helper_wait_for_dependencies(state);
Quoting Daniel Vetter (2017-07-19 13:54:58)
There's no reason to entirely wedge the gpu, for the minimal deadlock bugfix we only need to unbreak/decouple the atomic commit from the gpu reset. The simplest wait to fix that is by replacing the unconditional fence wait a the top of commit_tail by a wait which completes either when the fences are done (normal case, or when a reset doesn't need to touch the display state). Or when the gpu reset needs to force-unblock all pending modeset states.
Note that in both cases TDR itself keeps working, so from a userspace pov this trickery isn't observable. Users themselvs might spot a short glitch while the rendering is catching up again, but that's still better than pre-TDR where we've thrown away all the rendering, including innocent batches. Also, this fixes the regression TDR introduced of making gpu resets deadlock-prone when we do need to touch the display.
One thing I noticed is that gpu_error.flags seems to use both our own wait-queue in gpu_error.wait_queue, and the generic wait_on_bit facilities. Not entirely sure why this inconsistency exists, I just picked one style.
A possible future avenue could be to insert the gpu reset in-between ongoing modeset changes, which would avoid the momentary glitch. But that's a lot more work to implement in the atomic commit machinery, and given that we only need this for pre-g4x hw, of questionable utility just for the sake of polishing gpu reset even more on those old boxes. It might be useful for other features though.
v2: Rebase onto 4.13 with a s/wait_queue_t/struct wait_queue_entry/.
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Mika Kuoppala mika.kuoppala@intel.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com
drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07e98b07c5bc..369968539b40 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1564,6 +1564,7 @@ struct i915_gpu_error { unsigned long flags; #define I915_RESET_BACKOFF 0 #define I915_RESET_HANDOFF 1 +#define I915_RESET_MODESET 2 #define I915_WEDGED (BITS_PER_LONG - 1) #define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5aa7ca1ab592..4762f158032d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,10 +3471,9 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
i915_gem_set_wedged(dev_priv);
/* We have a modeset vs reset deadlock, defensively unbreak it. */
set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
wake_up_all(&dev_priv->gpu_error.wait_queue);
How are we breaking the
modeset_lock -> struct_mutex -> wait_on_reset ?
We wait the modeset_lock next which stops the reset from proceeding, and so the deadlock persists until the wedge-me timeout? -Chris
On Wed, Jul 19, 2017 at 3:42 PM, Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Daniel Vetter (2017-07-19 13:54:58)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5aa7ca1ab592..4762f158032d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,10 +3471,9 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
i915_gem_set_wedged(dev_priv);
/* We have a modeset vs reset deadlock, defensively unbreak it. */
set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
wake_up_all(&dev_priv->gpu_error.wait_queue);
How are we breaking the
modeset_lock -> struct_mutex -> wait_on_reset ?
We wait the modeset_lock next which stops the reset from proceeding, and so the deadlock persists until the wedge-me timeout?
Hm indeed, I didn't check my logs carefully enough and there's still "i915_reset_device timed out" in it. But I also thought the only real wait we have left for the gpu is the one under i915_sw_fence. I think we could simply switch i915_mutex_lock_interruptible calls in atomic modeset over mutex_lock_interruptible? Or is there another can of worms I'm missing? -Daniel
On Wed, Jul 19, 2017 at 4:05 PM, Daniel Vetter daniel.vetter@ffwll.ch wrote:
On Wed, Jul 19, 2017 at 3:42 PM, Chris Wilson chris@chris-wilson.co.uk wrote:
Quoting Daniel Vetter (2017-07-19 13:54:58)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5aa7ca1ab592..4762f158032d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3471,10 +3471,9 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return;
/* We have a modeset vs reset deadlock, defensively unbreak it.
*
* FIXME: We can do a _lot_ better, this is just a first iteration.*/
i915_gem_set_wedged(dev_priv);
/* We have a modeset vs reset deadlock, defensively unbreak it. */
set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
wake_up_all(&dev_priv->gpu_error.wait_queue);
How are we breaking the
modeset_lock -> struct_mutex -> wait_on_reset ?
We wait the modeset_lock next which stops the reset from proceeding, and so the deadlock persists until the wedge-me timeout?
Hm indeed, I didn't check my logs carefully enough and there's still "i915_reset_device timed out" in it. But I also thought the only real wait we have left for the gpu is the one under i915_sw_fence. I think we could simply switch i915_mutex_lock_interruptible calls in atomic modeset over mutex_lock_interruptible? Or is there another can of worms I'm missing?
Obviously, because that's what we're doing already. But I don't have the DRM_ERROR from i915_gem_wait_for_error anywhere in my logs either, so not clear what exactly is going on ... -Daniel
All these races and things are now solved through the vblank evasion trick, plus event handling is done using normal vblank even processing and drm_crtc_arm_vblank_event. We can get rid of all this complexity.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 151 ++++-------------------- drivers/gpu/drm/i915/intel_display.c | 215 ----------------------------------- drivers/gpu/drm/i915/intel_drv.h | 3 - 3 files changed, 22 insertions(+), 347 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f0cb278cee8b..b64c89e0fbf1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1708,18 +1708,6 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) } }
-static bool intel_pipe_handle_vblank(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - bool ret; - - ret = drm_handle_vblank(&dev_priv->drm, pipe); - if (ret) - intel_finish_page_flip_mmio(dev_priv, pipe); - - return ret; -} - static void valleyview_pipestat_irq_ack(struct drm_i915_private *dev_priv, u32 iir, u32 pipe_stats[I915_MAX_PIPES]) { @@ -1784,12 +1772,8 @@ static void valleyview_pipestat_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe;
for_each_pipe(dev_priv, pipe) { - if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS && - intel_pipe_handle_vblank(dev_priv, pipe)) - intel_check_page_flip(dev_priv, pipe); - - if (pipe_stats[pipe] & PLANE_FLIP_DONE_INT_STATUS_VLV) - intel_finish_page_flip_cs(dev_priv, pipe); + if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe);
if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) i9xx_pipe_crc_irq_handler(dev_priv, pipe); @@ -2241,19 +2225,14 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, DRM_ERROR("Poison interrupt\n");
for_each_pipe(dev_priv, pipe) { - if (de_iir & DE_PIPE_VBLANK(pipe) && - intel_pipe_handle_vblank(dev_priv, pipe)) - intel_check_page_flip(dev_priv, pipe); + if (de_iir & DE_PIPE_VBLANK(pipe)) + drm_handle_vblank(&dev_priv->drm, pipe);
if (de_iir & DE_PIPE_FIFO_UNDERRUN(pipe)) intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe);
if (de_iir & DE_PIPE_CRC_DONE(pipe)) i9xx_pipe_crc_irq_handler(dev_priv, pipe); - - /* plane/pipes map 1:1 on ilk+ */ - if (de_iir & DE_PLANE_FLIP_DONE(pipe)) - intel_finish_page_flip_cs(dev_priv, pipe); }
/* check event from PCH */ @@ -2315,13 +2294,8 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, intel_opregion_asle_intr(dev_priv);
for_each_pipe(dev_priv, pipe) { - if (de_iir & (DE_PIPE_VBLANK_IVB(pipe)) && - intel_pipe_handle_vblank(dev_priv, pipe)) - intel_check_page_flip(dev_priv, pipe); - - /* plane/pipes map 1:1 on ilk+ */ - if (de_iir & DE_PLANE_FLIP_DONE_IVB(pipe)) - intel_finish_page_flip_cs(dev_priv, pipe); + if (de_iir & (DE_PIPE_VBLANK_IVB(pipe))) + drm_handle_vblank(&dev_priv->drm, pipe); }
/* check event from PCH */ @@ -2502,7 +2476,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) }
for_each_pipe(dev_priv, pipe) { - u32 flip_done, fault_errors; + u32 fault_errors;
if (!(master_ctl & GEN8_DE_PIPE_IRQ(pipe))) continue; @@ -2516,18 +2490,8 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) ret = IRQ_HANDLED; I915_WRITE(GEN8_DE_PIPE_IIR(pipe), iir);
- if (iir & GEN8_PIPE_VBLANK && - intel_pipe_handle_vblank(dev_priv, pipe)) - intel_check_page_flip(dev_priv, pipe); - - flip_done = iir; - if (INTEL_INFO(dev_priv)->gen >= 9) - flip_done &= GEN9_PIPE_PLANE1_FLIP_DONE; - else - flip_done &= GEN8_PIPE_PRIMARY_FLIP_DONE; - - if (flip_done) - intel_finish_page_flip_cs(dev_priv, pipe); + if (iir & GEN8_PIPE_VBLANK) + drm_handle_vblank(&dev_priv->drm, pipe);
if (iir & GEN8_PIPE_CDCLK_CRC_DONE) hsw_pipe_crc_irq_handler(dev_priv, pipe); @@ -3710,34 +3674,6 @@ static int i8xx_irq_postinstall(struct drm_device *dev) /* * Returns true when a page flip has completed. */ -static bool i8xx_handle_vblank(struct drm_i915_private *dev_priv, - int plane, int pipe, u32 iir) -{ - u16 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane); - - if (!intel_pipe_handle_vblank(dev_priv, pipe)) - return false; - - if ((iir & flip_pending) == 0) - goto check_page_flip; - - /* We detect FlipDone by looking for the change in PendingFlip from '1' - * to '0' on the following vblank, i.e. IIR has the Pendingflip - * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence - * the flip is completed (no longer pending). Since this doesn't raise - * an interrupt per se, we watch for the change at vblank. - */ - if (I915_READ16(ISR) & flip_pending) - goto check_page_flip; - - intel_finish_page_flip_cs(dev_priv, pipe); - return true; - -check_page_flip: - intel_check_page_flip(dev_priv, pipe); - return false; -} - static irqreturn_t i8xx_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; @@ -3745,9 +3681,6 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) u16 iir, new_iir; u32 pipe_stats[2]; int pipe; - u16 flip_mask = - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT; irqreturn_t ret;
if (!intel_irqs_enabled(dev_priv)) @@ -3761,7 +3694,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) if (iir == 0) goto out;
- while (iir & ~flip_mask) { + while (iir) { /* Can't rely on pipestat interrupt bit in iir as it might * have been cleared after the pipestat interrupt was received. * It doesn't set the bit in iir again, but it still produces @@ -3783,7 +3716,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) } spin_unlock(&dev_priv->irq_lock);
- I915_WRITE16(IIR, iir & ~flip_mask); + I915_WRITE16(IIR, iir); new_iir = I915_READ16(IIR); /* Flush posted writes */
if (iir & I915_USER_INTERRUPT) @@ -3794,9 +3727,8 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg) if (HAS_FBC(dev_priv)) plane = !plane;
- if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS && - i8xx_handle_vblank(dev_priv, plane, pipe, iir)) - flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(plane); + if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe);
if (pipe_stats[pipe] & PIPE_CRC_DONE_INTERRUPT_STATUS) i9xx_pipe_crc_irq_handler(dev_priv, pipe); @@ -3896,45 +3828,11 @@ static int i915_irq_postinstall(struct drm_device *dev) return 0; }
-/* - * Returns true when a page flip has completed. - */ -static bool i915_handle_vblank(struct drm_i915_private *dev_priv, - int plane, int pipe, u32 iir) -{ - u32 flip_pending = DISPLAY_PLANE_FLIP_PENDING(plane); - - if (!intel_pipe_handle_vblank(dev_priv, pipe)) - return false; - - if ((iir & flip_pending) == 0) - goto check_page_flip; - - /* We detect FlipDone by looking for the change in PendingFlip from '1' - * to '0' on the following vblank, i.e. IIR has the Pendingflip - * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence - * the flip is completed (no longer pending). Since this doesn't raise - * an interrupt per se, we watch for the change at vblank. - */ - if (I915_READ(ISR) & flip_pending) - goto check_page_flip; - - intel_finish_page_flip_cs(dev_priv, pipe); - return true; - -check_page_flip: - intel_check_page_flip(dev_priv, pipe); - return false; -} - static irqreturn_t i915_irq_handler(int irq, void *arg) { struct drm_device *dev = arg; struct drm_i915_private *dev_priv = to_i915(dev); u32 iir, new_iir, pipe_stats[I915_MAX_PIPES]; - u32 flip_mask = - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT; int pipe, ret = IRQ_NONE;
if (!intel_irqs_enabled(dev_priv)) @@ -3945,7 +3843,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
iir = I915_READ(IIR); do { - bool irq_received = (iir & ~flip_mask) != 0; + bool irq_received = (iir) != 0; bool blc_event = false;
/* Can't rely on pipestat interrupt bit in iir as it might @@ -3980,7 +3878,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) i9xx_hpd_irq_handler(dev_priv, hotplug_status); }
- I915_WRITE(IIR, iir & ~flip_mask); + I915_WRITE(IIR, iir); new_iir = I915_READ(IIR); /* Flush posted writes */
if (iir & I915_USER_INTERRUPT) @@ -3991,9 +3889,8 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) if (HAS_FBC(dev_priv)) plane = !plane;
- if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS && - i915_handle_vblank(dev_priv, plane, pipe, iir)) - flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(plane); + if (pipe_stats[pipe] & PIPE_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe);
if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) blc_event = true; @@ -4026,7 +3923,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg) */ ret = IRQ_HANDLED; iir = new_iir; - } while (iir & ~flip_mask); + } while (iir);
enable_rpm_wakeref_asserts(dev_priv);
@@ -4161,9 +4058,6 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) u32 iir, new_iir; u32 pipe_stats[I915_MAX_PIPES]; int ret = IRQ_NONE, pipe; - u32 flip_mask = - I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | - I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT;
if (!intel_irqs_enabled(dev_priv)) return IRQ_NONE; @@ -4174,7 +4068,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) iir = I915_READ(IIR);
for (;;) { - bool irq_received = (iir & ~flip_mask) != 0; + bool irq_received = (iir) != 0; bool blc_event = false;
/* Can't rely on pipestat interrupt bit in iir as it might @@ -4212,7 +4106,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) i9xx_hpd_irq_handler(dev_priv, hotplug_status); }
- I915_WRITE(IIR, iir & ~flip_mask); + I915_WRITE(IIR, iir); new_iir = I915_READ(IIR); /* Flush posted writes */
if (iir & I915_USER_INTERRUPT) @@ -4221,9 +4115,8 @@ static irqreturn_t i965_irq_handler(int irq, void *arg) notify_ring(dev_priv->engine[VCS]);
for_each_pipe(dev_priv, pipe) { - if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS && - i915_handle_vblank(dev_priv, pipe, pipe, iir)) - flip_mask &= ~DISPLAY_PLANE_FLIP_PENDING(pipe); + if (pipe_stats[pipe] & PIPE_START_VBLANK_INTERRUPT_STATUS) + drm_handle_vblank(&dev_priv->drm, pipe);
if (pipe_stats[pipe] & PIPE_LEGACY_BLC_EVENT_STATUS) blc_event = true; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4762f158032d..02620f31374b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3405,14 +3405,6 @@ static void skylake_disable_primary_plane(struct intel_plane *primary, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); }
-static void intel_complete_page_flips(struct drm_i915_private *dev_priv) -{ - struct intel_crtc *crtc; - - for_each_intel_crtc(&dev_priv->drm, crtc) - intel_finish_page_flip_cs(dev_priv, crtc->pipe); -} - static int __intel_display_resume(struct drm_device *dev, struct drm_atomic_state *state, @@ -3525,13 +3517,6 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) if (!state) goto unlock;
- /* - * Flips in the rings will be nuked by the reset, - * so complete all pending flips so that user space - * will get its events and not get stuck. - */ - intel_complete_page_flips(dev_priv); - dev_priv->modeset_restore_state = NULL;
/* reset doesn't touch the display */ @@ -10131,140 +10116,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) kfree(intel_crtc); }
-/* Is 'a' after or equal to 'b'? */ -static bool g4x_flip_count_after_eq(u32 a, u32 b) -{ - return !((a - b) & 0x80000000); -} - -static bool __pageflip_finished_cs(struct intel_crtc *crtc, - struct intel_flip_work *work) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - if (abort_flip_on_reset(crtc)) - return true; - - /* - * The relevant registers doen't exist on pre-ctg. - * As the flip done interrupt doesn't trigger for mmio - * flips on gmch platforms, a flip count check isn't - * really needed there. But since ctg has the registers, - * include it in the check anyway. - */ - if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv)) - return true; - - /* - * BDW signals flip done immediately if the plane - * is disabled, even if the plane enable is already - * armed to occur at the next vblank :( - */ - - /* - * A DSPSURFLIVE check isn't enough in case the mmio and CS flips - * used the same base address. In that case the mmio flip might - * have completed, but the CS hasn't even executed the flip yet. - * - * A flip count check isn't enough as the CS might have updated - * the base address just after start of vblank, but before we - * managed to process the interrupt. This means we'd complete the - * CS flip too soon. - * - * Combining both checks should get us a good enough result. It may - * still happen that the CS flip has been executed, but has not - * yet actually completed. But in case the base address is the same - * anyway, we don't really care. - */ - return (I915_READ(DSPSURFLIVE(crtc->plane)) & ~0xfff) == - crtc->flip_work->gtt_offset && - g4x_flip_count_after_eq(I915_READ(PIPE_FLIPCOUNT_G4X(crtc->pipe)), - crtc->flip_work->flip_count); -} - -static bool -__pageflip_finished_mmio(struct intel_crtc *crtc, - struct intel_flip_work *work) -{ - /* - * MMIO work completes when vblank is different from - * flip_queued_vblank. - * - * Reset counter value doesn't matter, this is handled by - * i915_wait_request finishing early, so no need to handle - * reset here. - */ - return intel_crtc_get_vblank_counter(crtc) != work->flip_queued_vblank; -} - - -static bool pageflip_finished(struct intel_crtc *crtc, - struct intel_flip_work *work) -{ - if (!atomic_read(&work->pending)) - return false; - - smp_rmb(); - - if (is_mmio_work(work)) - return __pageflip_finished_mmio(crtc, work); - else - return __pageflip_finished_cs(crtc, work); -} - -void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) -{ - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - struct intel_flip_work *work; - unsigned long flags; - - /* Ignore early vblank irqs */ - if (!crtc) - return; - - /* - * This is called both by irq handlers and the reset code (to complete - * lost pageflips) so needs the full irqsave spinlocks. - */ - spin_lock_irqsave(&dev->event_lock, flags); - work = crtc->flip_work; - - if (work != NULL && - !is_mmio_work(work) && - pageflip_finished(crtc, work)) - page_flip_completed(crtc); - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - -void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe) -{ - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - struct intel_flip_work *work; - unsigned long flags; - - /* Ignore early vblank irqs */ - if (!crtc) - return; - - /* - * This is called both by irq handlers and the reset code (to complete - * lost pageflips) so needs the full irqsave spinlocks. - */ - spin_lock_irqsave(&dev->event_lock, flags); - work = crtc->flip_work; - - if (work != NULL && - is_mmio_work(work) && - pageflip_finished(crtc, work)) - page_flip_completed(crtc); - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static inline void intel_mark_page_flip_active(struct intel_crtc *crtc, struct intel_flip_work *work) { @@ -10275,72 +10126,6 @@ static inline void intel_mark_page_flip_active(struct intel_crtc *crtc, atomic_set(&work->pending, 1); }
-static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, - struct intel_crtc *intel_crtc, - struct intel_flip_work *work) -{ - u32 addr, vblank; - - if (!atomic_read(&work->pending)) - return false; - - smp_rmb(); - - vblank = intel_crtc_get_vblank_counter(intel_crtc); - if (work->flip_ready_vblank == 0) { - if (work->flip_queued_req && - !i915_gem_request_completed(work->flip_queued_req)) - return false; - - work->flip_ready_vblank = vblank; - } - - if (vblank - work->flip_ready_vblank < 3) - return false; - - /* Potential stall - if we see that the flip has happened, - * assume a missed interrupt. */ - if (INTEL_GEN(dev_priv) >= 4) - addr = I915_HI_DISPBASE(I915_READ(DSPSURF(intel_crtc->plane))); - else - addr = I915_READ(DSPADDR(intel_crtc->plane)); - - /* There is a potential issue here with a false positive after a flip - * to the same address. We could address this by checking for a - * non-incrementing frame counter. - */ - return addr == work->gtt_offset; -} - -void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) -{ - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - struct intel_flip_work *work; - - WARN_ON(!in_interrupt()); - - if (crtc == NULL) - return; - - spin_lock(&dev->event_lock); - work = crtc->flip_work; - - if (work != NULL && !is_mmio_work(work) && - __pageflip_stall_check_cs(dev_priv, crtc, work)) { - WARN_ONCE(1, - "Kicking stuck page flip: queued at %d, now %d\n", - work->flip_queued_vblank, intel_crtc_get_vblank_counter(crtc)); - page_flip_completed(crtc); - work = NULL; - } - - if (work != NULL && !is_mmio_work(work) && - intel_crtc_get_vblank_counter(crtc) - work->flip_queued_vblank > 1) - intel_queue_rps_boost_for_request(work->flip_queued_req); - spin_unlock(&dev->event_lock); -} - /** * intel_wm_need_update - Check whether watermarks need updating * @plane: drm plane diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a8834daf5c07..04f80b013db9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1408,9 +1408,6 @@ void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * intel_framebuffer_create(struct drm_i915_gem_object *obj, struct drm_mode_fb_cmd2 *mode_cmd); -void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); -void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); -void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); int intel_prepare_plane_fb(struct drm_plane *plane, struct drm_plane_state *new_state); void intel_cleanup_plane_fb(struct drm_plane *plane,
A bit an oversight - the current code did nothing, since only legacy flips used the unpin_work_count and assorted logic.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/intel_display.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 02620f31374b..343883214113 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4140,21 +4140,22 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc)
bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv) { - struct intel_crtc *crtc; - - /* Note that we don't need to be called with mode_config.lock here - * as our list of CRTC objects is static for the lifetime of the - * device and so cannot disappear as we iterate. Similarly, we can - * happily treat the predicates as racy, atomic checks as userspace - * cannot claim and pin a new fb without at least acquring the - * struct_mutex and so serialising with us. - */ - for_each_intel_crtc(&dev_priv->drm, crtc) { - if (atomic_read(&crtc->unpin_work_count) == 0) + struct drm_crtc *crtc; + bool cleanup_done; + + drm_for_each_crtc(crtc, &dev_priv->drm) { + struct drm_crtc_commit *commit; + spin_lock(&crtc->commit_lock); + commit = list_first_entry_or_null(&crtc->commit_list, + struct drm_crtc_commit, commit_entry); + cleanup_done = commit ? + try_wait_for_completion(&commit->cleanup_done) : true; + spin_unlock(&crtc->commit_lock); + + if (cleanup_done) continue;
- if (crtc->flip_work) - intel_wait_for_vblank(dev_priv, crtc->pipe); + drm_crtc_wait_one_vblank(crtc);
return true; }
Quoting Daniel Vetter (2017-07-19 13:55:00)
A bit an oversight - the current code did nothing, since only legacy flips used the unpin_work_count and assorted logic.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch
There's a fence deadlock testcase for this, kms_flip/fence? -Chris
On Wed, Jul 19, 2017 at 02:06:43PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:55:00)
A bit an oversight - the current code did nothing, since only legacy flips used the unpin_work_count and assorted logic.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch
There's a fence deadlock testcase for this, kms_flip/fence?
Crunching through them, but since I tend to test my stuff all mixed into one pile I've hit a bug in a different patch series of mine. Given that we've run without this for a while, not sure it's that critical really. -Daniel
Quoting Daniel Vetter (2017-07-19 14:15:44)
On Wed, Jul 19, 2017 at 02:06:43PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:55:00)
A bit an oversight - the current code did nothing, since only legacy flips used the unpin_work_count and assorted logic.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch
There's a fence deadlock testcase for this, kms_flip/fence?
Crunching through them, but since I tend to test my stuff all mixed into one pile I've hit a bug in a different patch series of mine. Given that we've run without this for a while, not sure it's that critical really.
It's only going to affect gen2 (realistically using 14 fences in a gen3 batch is unlikely) if we can have 3 fb in the pipeline as userspace assumes 2 are reserved for the flip. Or at least if we may race while fb holds 3 fences due to the wq.
EDEADLK is not something I've seen outside of buggy code, but it is certainly possible and this patch should fix it. -Chris
This gets rid of all the interactions between the legacy flip code and the modeset code. Yay!
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 70 --------------------- drivers/gpu/drm/i915/i915_drv.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 4 -- drivers/gpu/drm/i915/i915_gem.c | 2 - drivers/gpu/drm/i915/intel_display.c | 117 +---------------------------------- drivers/gpu/drm/i915/intel_drv.h | 21 +------ drivers/gpu/drm/i915/intel_sprite.c | 8 +-- 7 files changed, 3 insertions(+), 220 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2ef75c1a6119..c25f42c60d61 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -543,75 +543,6 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) return 0; }
-static int i915_gem_pageflip_info(struct seq_file *m, void *data) -{ - struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - for_each_intel_crtc(dev, crtc) { - const char pipe = pipe_name(crtc->pipe); - const char plane = plane_name(crtc->plane); - struct intel_flip_work *work; - - spin_lock_irq(&dev->event_lock); - work = crtc->flip_work; - if (work == NULL) { - seq_printf(m, "No flip due on pipe %c (plane %c)\n", - pipe, plane); - } else { - u32 pending; - u32 addr; - - pending = atomic_read(&work->pending); - if (pending) { - seq_printf(m, "Flip ioctl preparing on pipe %c (plane %c)\n", - pipe, plane); - } else { - seq_printf(m, "Flip pending (waiting for vsync) on pipe %c (plane %c)\n", - pipe, plane); - } - if (work->flip_queued_req) { - struct intel_engine_cs *engine = work->flip_queued_req->engine; - - seq_printf(m, "Flip queued on %s at seqno %x, last submitted seqno %x [current breadcrumb %x], completed? %d\n", - engine->name, - work->flip_queued_req->global_seqno, - intel_engine_last_submit(engine), - intel_engine_get_seqno(engine), - i915_gem_request_completed(work->flip_queued_req)); - } else - seq_printf(m, "Flip not associated with any ring\n"); - seq_printf(m, "Flip queued on frame %d, (was ready on frame %d), now %d\n", - work->flip_queued_vblank, - work->flip_ready_vblank, - intel_crtc_get_vblank_counter(crtc)); - seq_printf(m, "%d prepares\n", atomic_read(&work->pending)); - - if (INTEL_GEN(dev_priv) >= 4) - addr = I915_HI_DISPBASE(I915_READ(DSPSURF(crtc->plane))); - else - addr = I915_READ(DSPADDR(crtc->plane)); - seq_printf(m, "Current scanout address 0x%08x\n", addr); - - if (work->pending_flip_obj) { - seq_printf(m, "New framebuffer address 0x%08lx\n", (long)work->gtt_offset); - seq_printf(m, "MMIO update completed? %d\n", addr == work->gtt_offset); - } - } - spin_unlock_irq(&dev->event_lock); - } - - mutex_unlock(&dev->struct_mutex); - - return 0; -} - static int i915_gem_batch_pool_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -4854,7 +4785,6 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_gtt", i915_gem_gtt_info, 0}, {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, - {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, {"i915_gem_request", i915_gem_request_info, 0}, {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d36ffcdbb89a..6b583dc2eb1f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -876,7 +876,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->uncore.lock);
spin_lock_init(&dev_priv->mm.object_stat_lock); - spin_lock_init(&dev_priv->mmio_flip_lock); mutex_init(&dev_priv->sb_lock); mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->av_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 369968539b40..9bda09a7b693 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2144,9 +2144,6 @@ struct drm_i915_private { /* protects the irq masks */ spinlock_t irq_lock;
- /* protects the mmio flip data */ - spinlock_t mmio_flip_lock; - bool display_irqs_enabled;
/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ @@ -2251,7 +2248,6 @@ struct drm_i915_private {
struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; struct intel_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES]; - wait_queue_head_t pending_flip_queue;
#ifdef CONFIG_DEBUG_FS struct intel_pipe_crc pipe_crc[I915_MAX_PIPES]; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d6f9b4cb6e9b..1a8842f143fc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4936,8 +4936,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
- init_waitqueue_head(&dev_priv->pending_flip_queue); - atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
spin_lock_init(&dev_priv->fb_tracking.lock); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 343883214113..e52a2adbaaa5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -49,11 +49,6 @@ #include <linux/dma_remapping.h> #include <linux/reservation.h>
-static bool is_mmio_work(struct intel_flip_work *work) -{ - return work->mmio_work.func; -} - /* Primary plane formats for gen <= 3 */ static const uint32_t i8xx_primary_formats[] = { DRM_FORMAT_C8, @@ -3557,35 +3552,6 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags); }
-static bool abort_flip_on_reset(struct intel_crtc *crtc) -{ - struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error; - - if (i915_reset_backoff(error)) - return true; - - if (crtc->reset_count != i915_reset_count(error)) - return true; - - return false; -} - -static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - bool pending; - - if (abort_flip_on_reset(intel_crtc)) - return false; - - spin_lock_irq(&dev->event_lock); - pending = to_intel_crtc(crtc)->flip_work != NULL; - spin_unlock_irq(&dev->event_lock); - - return pending; -} - static void intel_update_pipe_config(struct intel_crtc *crtc, struct intel_crtc_state *old_crtc_state) { @@ -4163,57 +4129,6 @@ bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv) return false; }
-static void page_flip_completed(struct intel_crtc *intel_crtc) -{ - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - struct intel_flip_work *work = intel_crtc->flip_work; - - intel_crtc->flip_work = NULL; - - if (work->event) - drm_crtc_send_vblank_event(&intel_crtc->base, work->event); - - drm_crtc_vblank_put(&intel_crtc->base); - - wake_up_all(&dev_priv->pending_flip_queue); - trace_i915_flip_complete(intel_crtc->plane, - work->pending_flip_obj); - - queue_work(dev_priv->wq, &work->unpin_work); -} - -static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - long ret; - - WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue)); - - ret = wait_event_interruptible_timeout( - dev_priv->pending_flip_queue, - !intel_crtc_has_pending_flip(crtc), - 60*HZ); - - if (ret < 0) - return ret; - - if (ret == 0) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_flip_work *work; - - spin_lock_irq(&dev->event_lock); - work = intel_crtc->flip_work; - if (work && !is_mmio_work(work)) { - WARN_ONCE(1, "Removing stuck page flip\n"); - page_flip_completed(intel_crtc); - } - spin_unlock_irq(&dev->event_lock); - } - - return 0; -} - void lpt_disable_iclkip(struct drm_i915_private *dev_priv) { u32 temp; @@ -5824,8 +5739,6 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc, return;
if (crtc->primary->state->visible) { - WARN_ON(intel_crtc->flip_work); - intel_pre_disable_primary_noatomic(crtc);
intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); @@ -10098,35 +10011,11 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, static void intel_crtc_destroy(struct drm_crtc *crtc) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct intel_flip_work *work; - - spin_lock_irq(&dev->event_lock); - work = intel_crtc->flip_work; - intel_crtc->flip_work = NULL; - spin_unlock_irq(&dev->event_lock); - - if (work) { - cancel_work_sync(&work->mmio_work); - cancel_work_sync(&work->unpin_work); - kfree(work); - }
drm_crtc_cleanup(crtc); - kfree(intel_crtc); }
-static inline void intel_mark_page_flip_active(struct intel_crtc *crtc, - struct intel_flip_work *work) -{ - work->flip_queued_vblank = intel_crtc_get_vblank_counter(crtc); - - /* Ensure that the work item is consistent when activating it ... */ - smp_mb__before_atomic(); - atomic_set(&work->pending, 1); -} - /** * intel_wm_need_update - Check whether watermarks need updating * @plane: drm plane @@ -11945,10 +11834,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, if (state->legacy_cursor_update) continue;
- ret = intel_crtc_wait_for_pending_flips(crtc); - if (ret) - return ret; - if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2) flush_workqueue(dev_priv->wq); } @@ -12752,7 +12637,7 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, { struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
- intel_pipe_update_end(intel_crtc, NULL); + intel_pipe_update_end(intel_crtc); }
/** diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 04f80b013db9..9cb7e781e863 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -797,7 +797,6 @@ struct intel_crtc { u8 plane_ids_mask; unsigned long long enabled_power_domains; struct intel_overlay *overlay; - struct intel_flip_work *flip_work;
atomic_t unpin_work_count;
@@ -1132,24 +1131,6 @@ intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum plane plane) return dev_priv->plane_to_crtc_mapping[plane]; }
-struct intel_flip_work { - struct work_struct unpin_work; - struct work_struct mmio_work; - - struct drm_crtc *crtc; - struct i915_vma *old_vma; - struct drm_framebuffer *old_fb; - struct drm_i915_gem_object *pending_flip_obj; - struct drm_pending_vblank_event *event; - atomic_t pending; - u32 flip_count; - u32 gtt_offset; - struct drm_i915_gem_request *flip_queued_req; - u32 flip_queued_vblank; - u32 flip_ready_vblank; - unsigned int rotation; -}; - struct intel_load_detect_pipe { struct drm_atomic_state *restore_state; }; @@ -1903,7 +1884,7 @@ struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); void intel_pipe_update_start(struct intel_crtc *crtc); -void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work); +void intel_pipe_update_end(struct intel_crtc *crtc);
/* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 94f9a1332dbf..8e25694a1508 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -176,7 +176,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) * re-enables interrupts and verifies the update was actually completed * before a vblank using the value of @start_vbl_count. */ -void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work) +void intel_pipe_update_end(struct intel_crtc *crtc) { enum pipe pipe = crtc->pipe; int scanline_end = intel_get_crtc_scanline(crtc); @@ -184,12 +184,6 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work ktime_t end_vbl_time = ktime_get(); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
- if (work) { - work->flip_queued_vblank = end_vbl_count; - smp_mb__before_atomic(); - atomic_set(&work->pending, 1); - } - trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end);
/* We're still in the vblank-evade critical section, this can't race.
Quoting Daniel Vetter (2017-07-19 13:55:01)
This gets rid of all the interactions between the legacy flip code and the modeset code. Yay!
Including our missed vblank boosting. (That's been dead for a while.) -Chris
On Wed, Jul 19, 2017 at 02:07:36PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:55:01)
This gets rid of all the interactions between the legacy flip code and the modeset code. Yay!
Including our missed vblank boosting. (That's been dead for a while.)
Hm right, but should be easy to add (and bonus, for every display update, not just flips) in the intel_sw_fence_wait function. Can you pls point me at what function I should call to reverse-boost an i915_sw_fence (and only that)?
Then we could queue up a vblank callback that fires on the next vblank for any of the CRTC in the update and boosts the i915_sw_fence. If we just boost the fence (instead of the context or something) it should also be easy to filter out boosting when the request has completed meanwhile. -Daniel
Quoting Daniel Vetter (2017-07-19 14:24:20)
On Wed, Jul 19, 2017 at 02:07:36PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:55:01)
This gets rid of all the interactions between the legacy flip code and the modeset code. Yay!
Including our missed vblank boosting. (That's been dead for a while.)
Hm right, but should be easy to add (and bonus, for every display update, not just flips) in the intel_sw_fence_wait function. Can you pls point me at what function I should call to reverse-boost an i915_sw_fence (and only that)?
You would have to walk the tree of input sw fences, detect those are dma_fences and check if they are a request. Then for each request mark it as boosted. That information is easier to keep during construction rather than recovering it later, though honestly, I would just boost the exclusive fences for the atomic state, i.e. use the reservation_object under the assumption that the snapshot hasn't drifted too much.
Then we could queue up a vblank callback that fires on the next vblank for any of the CRTC in the update and boosts the i915_sw_fence. If we just boost the fence (instead of the context or something) it should also be easy to filter out boosting when the request has completed meanwhile.
Yup, boosting is now completely tied to requests. -Chris
The core already does this in setup_commit(). With this we can also remove the unpin_work_count since it's the last user.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/intel_display.c | 13 +------------ drivers/gpu/drm/i915/intel_drv.h | 2 -- 2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e52a2adbaaa5..351208b7b1ad 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11825,18 +11825,7 @@ static int intel_atomic_check(struct drm_device *dev, static int intel_atomic_prepare_commit(struct drm_device *dev, struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i, ret; - - for_each_new_crtc_in_state(state, crtc, crtc_state, i) { - if (state->legacy_cursor_update) - continue; - - if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2) - flush_workqueue(dev_priv->wq); - } + int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9cb7e781e863..96402c06e295 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -798,8 +798,6 @@ struct intel_crtc { unsigned long long enabled_power_domains; struct intel_overlay *overlay;
- atomic_t unpin_work_count; - /* Display surface base address adjustement for pageflips. Note that on * gen4+ this only adjusts up to a tile, offsets within a tile are * handled in the hw itself (with the TILEOFF register). */
Quoting Daniel Vetter (2017-07-19 13:55:02)
The core already does this in setup_commit(). With this we can also remove the unpin_work_count since it's the last user.
Also note that the loop only existed for the legacy pageflip support, with that removed it becomes entirely redundant. -Chris
On Wed, Jul 19, 2017 at 02:09:02PM +0100, Chris Wilson wrote:
Quoting Daniel Vetter (2017-07-19 13:55:02)
The core already does this in setup_commit(). With this we can also remove the unpin_work_count since it's the last user.
Also note that the loop only existed for the legacy pageflip support, with that removed it becomes entirely redundant.
Yeah, I just wanted to make clear that removing this isn't a bit of code that we failed to move over to atomic. It's been dead since a while. -Daniel
Op 19-07-17 om 14:55 schreef Daniel Vetter:
The core already does this in setup_commit(). With this we can also remove the unpin_work_count since it's the last user.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/i915/intel_display.c | 13 +------------ drivers/gpu/drm/i915/intel_drv.h | 2 -- 2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e52a2adbaaa5..351208b7b1ad 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11825,18 +11825,7 @@ static int intel_atomic_check(struct drm_device *dev, static int intel_atomic_prepare_commit(struct drm_device *dev, struct drm_atomic_state *state) {
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_crtc_state *crtc_state;
- struct drm_crtc *crtc;
- int i, ret;
- for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
if (state->legacy_cursor_update)
continue;
if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
flush_workqueue(dev_priv->wq);
- }
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9cb7e781e863..96402c06e295 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -798,8 +798,6 @@ struct intel_crtc { unsigned long long enabled_power_domains; struct intel_overlay *overlay;
- atomic_t unpin_work_count;
- /* Display surface base address adjustement for pageflips. Note that on
- gen4+ this only adjusts up to a tile, offsets within a tile are
- handled in the hw itself (with the TILEOFF register). */
I like red diffs..
For patch 1, 4 (with updated commit message), 6-9: Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com
On Wed, Jul 19, 2017 at 04:01:03PM +0200, Maarten Lankhorst wrote:
Op 19-07-17 om 14:55 schreef Daniel Vetter:
The core already does this in setup_commit(). With this we can also remove the unpin_work_count since it's the last user.
Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Ville Syrjälä ville.syrjala@linux.intel.com Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch
drivers/gpu/drm/i915/intel_display.c | 13 +------------ drivers/gpu/drm/i915/intel_drv.h | 2 -- 2 files changed, 1 insertion(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e52a2adbaaa5..351208b7b1ad 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11825,18 +11825,7 @@ static int intel_atomic_check(struct drm_device *dev, static int intel_atomic_prepare_commit(struct drm_device *dev, struct drm_atomic_state *state) {
- struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_crtc_state *crtc_state;
- struct drm_crtc *crtc;
- int i, ret;
- for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
if (state->legacy_cursor_update)
continue;
if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
flush_workqueue(dev_priv->wq);
- }
int ret;
ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9cb7e781e863..96402c06e295 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -798,8 +798,6 @@ struct intel_crtc { unsigned long long enabled_power_domains; struct intel_overlay *overlay;
- atomic_t unpin_work_count;
- /* Display surface base address adjustement for pageflips. Note that on
- gen4+ this only adjusts up to a tile, offsets within a tile are
- handled in the hw itself (with the TILEOFF register). */
I like red diffs..
For patch 1, 4 (with updated commit message), 6-9: Reviewed-by: Maarten Lankhorst maarten.lankhorst@linux.intel.com
Ok I merged patches 1&2, that take care of this for a lot of users/systems, and the remaining stuff needs a notch more polish, and a lot more testing anyway. -Daniel
dri-devel@lists.freedesktop.org