Hi,
This is a modified version of the series I sent earlier (http://comments.gmane.org/gmane.comp.video.dri.devel/113812). I haven't had time to work on the locking issues, so I've dropped the patches related to that so that the rest could get merged.
I have also added three new small patches (the three last ones).
Tomi
Tomi Valkeinen (9): drm/omap: fix encoder-crtc mapping drm/omap: page_flip: return -EBUSY if flip pending drm/omap: fix race issue with vsync irq and apply drm/omap: clear omap_obj->paddr in omap_gem_put_paddr() drm/omap: add pin refcounting to omap_framebuffer drm/omap: add a comment why locking is missing drm/omap: fix operation without fbdev drm/omap: fix error handling in omap_framebuffer_create() drm/omap: handle mismatching color format and buffer width
drivers/gpu/drm/omapdrm/omap_crtc.c | 19 ++++++++++++++++--- drivers/gpu/drm/omapdrm/omap_drv.c | 18 ++++++++++-------- drivers/gpu/drm/omapdrm/omap_fb.c | 26 +++++++++++++++++++++++--- drivers/gpu/drm/omapdrm/omap_gem.c | 1 + drivers/gpu/drm/omapdrm/omap_plane.c | 4 ++++ 5 files changed, 54 insertions(+), 14 deletions(-)
OMAP DSS hardware supports changing the output port to which an overlay manager's video stream goes. For example, DPI video stream can come from any of the four overlay managers on OMAP5.
However, as it's difficult to manage the change in the driver, the omapdss driver does not support that at the moment, and has a hardcoded overlay manager per output.
omapdrm, on the other hand, uses the hardware features to find out which overlay manager to use for an output, which causes problems. For example, on OMAP5, omapdrm tries to use DIGIT overlay manager for DPI output, instead of the LCD3 required by the omapdss driver.
This patch changes the omapdrm to use the omapdss driver's hardcoded overlay managers, which fixes the issue.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 002b9721e85a..26fda74c1e48 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -286,14 +286,13 @@ static int omap_modeset_init(struct drm_device *dev) for (id = 0; id < priv->num_crtcs; id++) { struct drm_crtc *crtc = priv->crtcs[id]; enum omap_channel crtc_channel; - enum omap_dss_output_id supported_outputs;
crtc_channel = omap_crtc_channel(crtc); - supported_outputs = - dss_feat_get_supported_outputs(crtc_channel);
- if (supported_outputs & output->id) + if (output->dispc_channel == crtc_channel) { encoder->possible_crtcs |= (1 << id); + break; + } }
omap_dss_put_device(output);
The DRM documentation says:
"If a page flip is already pending, the page_flip operation must return -EBUSY."
Currently omapdrm returns -EINVAL instead. Fix omapdrm by returning -EBUSY.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c index 2d28dc337cfb..0812b0f80167 100644 --- a/drivers/gpu/drm/omapdrm/omap_crtc.c +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c @@ -360,7 +360,7 @@ static int omap_crtc_page_flip_locked(struct drm_crtc *crtc, if (omap_crtc->old_fb) { spin_unlock_irqrestore(&dev->event_lock, flags); dev_err(dev->dev, "already a pending flip\n"); - return -EINVAL; + return -EBUSY; }
omap_crtc->event = event;
omap_crtc's apply_worker does:
omap_irq_register(dev, &omap_crtc->apply_irq); dispc_mgr_go(channel);
This is supposed to enable the vsync irq, and set the GO bit. The vsync handler will later check if the HW has cleared the GO bit and queue apply work.
However, what may happen is that the vsync irq is enabled, and it gets ran before the GO bit is set by the apply_worker. In this case the vsync handler will see the GO bit as cleared, and queues the apply work too early.
This causes WARN'ings from dispc driver, and possibly other problems.
This patch fixes the issue by adding a new variable, 'go_bit_set' which is used to track the supposed state of GO bit and helps the apply worker and irq handler handle the job without a race.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_crtc.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c index 0812b0f80167..193979f97bdb 100644 --- a/drivers/gpu/drm/omapdrm/omap_crtc.c +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c @@ -47,6 +47,9 @@ struct omap_crtc { bool enabled; bool full_update;
+ /* tracks the state of GO bit between irq handler and apply worker */ + bool go_bit_set; + struct omap_drm_apply apply;
struct omap_drm_irq apply_irq; @@ -442,11 +445,16 @@ static void omap_crtc_apply_irq(struct omap_drm_irq *irq, uint32_t irqstatus) if (!omap_crtc->error_irq.registered) __omap_irq_register(crtc->dev, &omap_crtc->error_irq);
- if (!dispc_mgr_go_busy(omap_crtc->channel)) { + /* make sure we see the most recent 'go_bit_set' */ + rmb(); + if (omap_crtc->go_bit_set && !dispc_mgr_go_busy(omap_crtc->channel)) { struct omap_drm_private *priv = crtc->dev->dev_private; DBG("%s: apply done", omap_crtc->name); __omap_irq_unregister(crtc->dev, &omap_crtc->apply_irq); + omap_crtc->go_bit_set = false; + /* make sure apple_worker sees 'go_bit_set = false' */ + wmb(); queue_work(priv->wq, &omap_crtc->apply_work); } } @@ -472,7 +480,9 @@ static void apply_worker(struct work_struct *work) * If we are still pending a previous update, wait.. when the * pending update completes, we get kicked again. */ - if (omap_crtc->apply_irq.registered) + /* make sure we see the most recent 'go_bit_set' */ + rmb(); + if (omap_crtc->go_bit_set) goto out;
/* finish up previous apply's: */ @@ -502,6 +512,9 @@ static void apply_worker(struct work_struct *work) if (dispc_mgr_is_enabled(channel)) { omap_irq_register(dev, &omap_crtc->apply_irq); dispc_mgr_go(channel); + omap_crtc->go_bit_set = true; + /* make sure the irq handler sees 'go_bit_set' */ + wmb(); } else { struct omap_drm_private *priv = dev->dev_private; queue_work(priv->wq, &omap_crtc->apply_work);
Clear omap_obj's paddr when unmapping the memory, so that it's easier to catch bad use of the paddr.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_gem.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index e4849413ee80..b342e7b3bf00 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -829,6 +829,7 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) dev_err(obj->dev->dev, "could not release unmap: %d\n", ret); } + omap_obj->paddr = 0; omap_obj->block = NULL; } }
omap_framebuffer_pin() and omap_framebuffer_unpin() are currently broken, as they cannot be called multiple times (i.e. pin, pin, unpin, unpin), which is what happens in certain cases. This issue causes the driver to possibly use 0 as an address for a displayed buffer, leading to OCP error from DSS.
This patch fixes the issue by adding a simple pin_count, used to track the number of pins.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_fb.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 2a5cacdc344b..58f2af32ede8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -86,6 +86,7 @@ struct plane {
struct omap_framebuffer { struct drm_framebuffer base; + int pin_count; const struct format *format; struct plane planes[4]; }; @@ -261,6 +262,11 @@ int omap_framebuffer_pin(struct drm_framebuffer *fb) struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); int ret, i, n = drm_format_num_planes(fb->pixel_format);
+ if (omap_fb->pin_count > 0) { + omap_fb->pin_count++; + return 0; + } + for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; ret = omap_gem_get_paddr(plane->bo, &plane->paddr, true); @@ -269,6 +275,8 @@ int omap_framebuffer_pin(struct drm_framebuffer *fb) omap_gem_dma_sync(plane->bo, DMA_TO_DEVICE); }
+ omap_fb->pin_count++; + return 0;
fail: @@ -287,6 +295,11 @@ int omap_framebuffer_unpin(struct drm_framebuffer *fb) struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); int ret, i, n = drm_format_num_planes(fb->pixel_format);
+ omap_fb->pin_count--; + + if (omap_fb->pin_count > 0) + return 0; + for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; ret = omap_gem_put_paddr(plane->bo);
unpin_worker() calls omap_framebuffer_unpin() without any locks, which looks very suspicious. However, both pin and unpin are always called via the driver's private workqueue, so the access is synchronized that way.
Add a comment to make this clear.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_plane.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 891a4dc608af..743d04981d71 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -71,6 +71,10 @@ static void unpin_worker(struct drm_flip_work *work, void *val) container_of(work, struct omap_plane, unpin_work); struct drm_device *dev = omap_plane->base.dev;
+ /* + * omap_framebuffer_pin/unpin are always called from priv->wq, + * so there's no need for locking here. + */ omap_framebuffer_unpin(val); mutex_lock(&dev->mode_config.mutex); drm_framebuffer_unreference(val);
omapdrm should work fine even if fbdev is missing. The current driver crashes in that case, though, as it is missing checks for the fbdev.
Add the checks so that we don't free fbdev or restore fbdev mode when there's no fbdev.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_drv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 26fda74c1e48..65d665ff965f 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -518,7 +518,8 @@ static int dev_unload(struct drm_device *dev)
drm_kms_helper_poll_fini(dev);
- omap_fbdev_free(dev); + if (priv->fbdev) + omap_fbdev_free(dev);
/* flush crtcs so the fbs get released */ for (i = 0; i < priv->num_crtcs; i++) @@ -587,9 +588,11 @@ static void dev_lastclose(struct drm_device *dev) } }
- ret = drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev); - if (ret) - DBG("failed to restore crtc mode"); + if (priv->fbdev) { + ret = drm_fb_helper_restore_fbdev_mode_unlocked(priv->fbdev); + if (ret) + DBG("failed to restore crtc mode"); + } }
static void dev_preclose(struct drm_device *dev, struct drm_file *file)
When an error happens in omap_framebuffer_create(), omap_framebuffer_create() calls omap_framebuffer_destroy() if the fb struct has been allocated. However, that crashes, as omap_framebuffer_destroy(), which calls drm_framebuffer_cleanup(), should only be called after drm_framebuffer_init()
Fix this by just calling kfree() for the allocated fb when an error happens.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_fb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 58f2af32ede8..2975096abdf5 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -420,7 +420,7 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - struct omap_framebuffer *omap_fb; + struct omap_framebuffer *omap_fb = NULL; struct drm_framebuffer *fb = NULL; const struct format *format = NULL; int ret, i, n = drm_format_num_planes(mode_cmd->pixel_format); @@ -491,8 +491,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, return fb;
fail: - if (fb) - omap_framebuffer_destroy(fb); + kfree(omap_fb);
return ERR_PTR(ret); }
omapdrm doesn't check if the width of the framebuffer and the color format's bits-per-pixel match.
For example, using a display with a width of 1280, and a buffer allocated with using 32 bits per pixel (i.e. 1280*4 = 5120 bytes), with a 24 bits per pixel color format, leads to the following mismatch: 5120/3 = 1706.666... bytes. This causes bad colors and a tilt on the screen.
Add a check into omapdrm to return an error if the user tries to use such a combination.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com --- drivers/gpu/drm/omapdrm/omap_fb.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 2975096abdf5..bf98580223d0 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -463,6 +463,14 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, goto fail; }
+ if (mode_cmd->width % format->planes[i].stride_bpp != 0) { + dev_err(dev->dev, + "buffer width (%d) is not a multiple of pixel width (%d)\n", + mode_cmd->width, format->planes[i].stride_bpp); + ret = -EINVAL; + goto fail; + } + size = pitch * mode_cmd->height / format->planes[i].sub_y;
if (size > (omap_gem_mmap_size(bos[i]) - mode_cmd->offsets[i])) {
On OMAP5 it is not possible to use TILER buffer with CPU when caching or write-combining is used. Doing so leads to errors from the memory manager.
However, on OMAP4, write-combining works fine.
This patch adds platform specific data for the TILER, and a function tiler_get_cpu_cache_flags() which can be used to get the caching mode to be used.
Note that without write-combining the use of the TILER buffer with CPU is unusably slow. It's still good to have it operational for testing purposes.
Signed-off-by: Tomi Valkeinen tomi.valkeinen@ti.com ---
One more patch, which was left out from the series.
Tomi
drivers/gpu/drm/omapdrm/omap_dmm_priv.h | 6 +++++ drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 39 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/omapdrm/omap_dmm_tiler.h | 1 + drivers/gpu/drm/omapdrm/omap_gem.c | 4 ++-- 4 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_priv.h b/drivers/gpu/drm/omapdrm/omap_dmm_priv.h index 58bcd6ae0255..d96660573076 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_priv.h +++ b/drivers/gpu/drm/omapdrm/omap_dmm_priv.h @@ -153,6 +153,10 @@ struct refill_engine { struct list_head idle_node; };
+struct dmm_platform_data { + uint32_t cpu_cache_flags; +}; + struct dmm { struct device *dev; void __iomem *base; @@ -183,6 +187,8 @@ struct dmm {
/* allocation list and lock */ struct list_head alloc_head; + + const struct dmm_platform_data *plat_data; };
#endif diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index 56c60552abba..ef3d14d9cfa1 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -39,6 +39,10 @@ static struct tcm *containers[TILFMT_NFORMATS]; static struct dmm *omap_dmm;
+#if defined(CONFIG_OF) +static const struct of_device_id dmm_of_match[]; +#endif + /* global spinlock for protecting lists */ static DEFINE_SPINLOCK(list_lock);
@@ -529,6 +533,11 @@ size_t tiler_vsize(enum tiler_fmt fmt, uint16_t w, uint16_t h) return round_up(geom[fmt].cpp * w, PAGE_SIZE) * h; }
+uint32_t tiler_get_cpu_cache_flags(void) +{ + return omap_dmm->plat_data->cpu_cache_flags; +} + bool dmm_is_available(void) { return omap_dmm ? true : false; @@ -592,6 +601,18 @@ static int omap_dmm_probe(struct platform_device *dev)
init_waitqueue_head(&omap_dmm->engine_queue);
+ if (dev->dev.of_node) { + const struct of_device_id *match; + + match = of_match_node(dmm_of_match, dev->dev.of_node); + if (!match) { + dev_err(&dev->dev, "failed to find matching device node\n"); + return -ENODEV; + } + + omap_dmm->plat_data = match->data; + } + /* lookup hwmod data - base address and irq */ mem = platform_get_resource(dev, IORESOURCE_MEM, 0); if (!mem) { @@ -972,9 +993,23 @@ static const struct dev_pm_ops omap_dmm_pm_ops = { #endif
#if defined(CONFIG_OF) +static const struct dmm_platform_data dmm_omap4_platform_data = { + .cpu_cache_flags = OMAP_BO_WC, +}; + +static const struct dmm_platform_data dmm_omap5_platform_data = { + .cpu_cache_flags = OMAP_BO_UNCACHED, +}; + static const struct of_device_id dmm_of_match[] = { - { .compatible = "ti,omap4-dmm", }, - { .compatible = "ti,omap5-dmm", }, + { + .compatible = "ti,omap4-dmm", + .data = &dmm_omap4_platform_data, + }, + { + .compatible = "ti,omap5-dmm", + .data = &dmm_omap5_platform_data, + }, {}, }; #endif diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h index 4fdd61e54bd2..e83c78372db8 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.h @@ -106,6 +106,7 @@ uint32_t tiler_stride(enum tiler_fmt fmt, uint32_t orient); size_t tiler_size(enum tiler_fmt fmt, uint16_t w, uint16_t h); size_t tiler_vsize(enum tiler_fmt fmt, uint16_t w, uint16_t h); void tiler_align(enum tiler_fmt fmt, uint16_t *w, uint16_t *h); +uint32_t tiler_get_cpu_cache_flags(void); bool dmm_is_available(void);
extern struct platform_driver omap_dmm_driver; diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index b342e7b3bf00..3b5cbce70def 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1360,8 +1360,8 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, /* currently don't allow cached buffers.. there is some caching * stuff that needs to be handled better */ - flags &= ~(OMAP_BO_CACHED|OMAP_BO_UNCACHED); - flags |= OMAP_BO_WC; + flags &= ~(OMAP_BO_CACHED|OMAP_BO_WC|OMAP_BO_UNCACHED); + flags |= tiler_get_cpu_cache_flags();
/* align dimensions to slot boundaries... */ tiler_align(gem2fmt(flags),
dri-devel@lists.freedesktop.org