Generic fbdev emulation maps and unmaps the console BO for updating it's content from the shadow buffer. Drivers using VRAM helpers may see reduced performance as the mapping operations can create significant overhead. A report of this problem is at [1].
This patch set fixes the problem by adding a ref counter to the GEM VRAM buffers' kmap operation, and keeping the fbdev's console buffer mapped while the console is being displayed. These changes avoids the frequent mappings in the fbdev code. The drivers, ast and mgag200, maps the console's buffer when it becomes visible and the fbdev code reuses this mapping. The original fbdev code in ast and mgag200 used the same optimization.
[1] https://lists.freedesktop.org/archives/dri-devel/2019-July/228663.html
Thomas Zimmermann (3): drm/vram-helpers: Add kmap ref-counting to GEM VRAM objects drm/ast: Map fbdev framebuffer while it's being displayed drm/mgag200: Map fbdev framebuffer while it's being displayed
drivers/gpu/drm/ast/ast_mode.c | 20 +++++++ drivers/gpu/drm/drm_gem_vram_helper.c | 74 +++++++++++++++++++------- drivers/gpu/drm/mgag200/mgag200_mode.c | 21 ++++++++ include/drm/drm_gem_vram_helper.h | 13 +++++ 4 files changed, 110 insertions(+), 18 deletions(-)
-- 2.22.0
The kmap and kunmap operations of GEM VRAM buffers can now be called in interleaving pairs. The first call to drm_gem_vram_kmap() maps the buffer's memory to kernel address space and the final call to drm_gem_vram_kunmap() unmaps the memory. Intermediate calls to these functions increment or decrement a reference counter.
This change allows for keeping buffer memory mapped for longer and minimizes the amount of changes to TLB, page tables, etc. The latter is required to work around a performance regression where the fbdev code frequently mapped and unmapped VAM buffers.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: cf1ca9aeb930 ("drm/fb-helper: Map DRM client buffer only when required") Cc: Noralf Trønnes noralf@tronnes.org Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Maxime Ripard maxime.ripard@bootlin.com Cc: Sean Paul sean@poorly.run Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Cc: Rong Chen rong.a.chen@intel.com Cc: Feng Tang feng.tang@intel.com Cc: Huang Ying ying.huang@intel.com --- drivers/gpu/drm/drm_gem_vram_helper.c | 74 ++++++++++++++++++++------- include/drm/drm_gem_vram_helper.h | 13 +++++ 2 files changed, 69 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index e0fbfb6570cf..db4b8bf16724 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -26,7 +26,11 @@ static void drm_gem_vram_cleanup(struct drm_gem_vram_object *gbo) * TTM buffer object in 'bo' has already been cleaned * up; only release the GEM object. */ + + WARN_ON(gbo->kmap_use_count); + drm_gem_object_release(&gbo->gem); + mutex_destroy(&gbo->kmap_lock); }
static void drm_gem_vram_destroy(struct drm_gem_vram_object *gbo) @@ -100,6 +104,8 @@ static int drm_gem_vram_init(struct drm_device *dev, if (ret) goto err_drm_gem_object_release;
+ mutex_init(&gbo->kmap_lock); + return 0;
err_drm_gem_object_release: @@ -283,6 +289,34 @@ int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo) } EXPORT_SYMBOL(drm_gem_vram_unpin);
+static void *drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo, + bool map, bool *is_iomem) +{ + int ret; + struct ttm_bo_kmap_obj *kmap = &gbo->kmap; + + if (gbo->kmap_use_count > 0) + goto out; + + if (kmap->virtual || !map) + goto out; + + ret = ttm_bo_kmap(&gbo->bo, 0, gbo->bo.num_pages, kmap); + if (ret) + return ERR_PTR(ret); + +out: + if (!kmap->virtual) { + if (is_iomem) + *is_iomem = false; + return NULL; /* not mapped; don't increment ref */ + } + ++gbo->kmap_use_count; + if (is_iomem) + return ttm_kmap_obj_virtual(kmap, is_iomem); + return kmap->virtual; +} + /** * drm_gem_vram_kmap() - Maps a GEM VRAM object into kernel address space * @gbo: the GEM VRAM object @@ -304,40 +338,44 @@ void *drm_gem_vram_kmap(struct drm_gem_vram_object *gbo, bool map, bool *is_iomem) { int ret; - struct ttm_bo_kmap_obj *kmap = &gbo->kmap; - - if (kmap->virtual || !map) - goto out; + void *virtual;
- ret = ttm_bo_kmap(&gbo->bo, 0, gbo->bo.num_pages, kmap); + ret = mutex_lock_interruptible(&gbo->kmap_lock); if (ret) return ERR_PTR(ret); + virtual = drm_gem_vram_kmap_locked(gbo, map, is_iomem); + mutex_unlock(&gbo->kmap_lock);
-out: - if (!is_iomem) - return kmap->virtual; - if (!kmap->virtual) { - *is_iomem = false; - return NULL; - } - return ttm_kmap_obj_virtual(kmap, is_iomem); + return virtual; } EXPORT_SYMBOL(drm_gem_vram_kmap);
-/** - * drm_gem_vram_kunmap() - Unmaps a GEM VRAM object - * @gbo: the GEM VRAM object - */ -void drm_gem_vram_kunmap(struct drm_gem_vram_object *gbo) +static void drm_gem_vram_kunmap_locked(struct drm_gem_vram_object *gbo) { struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
+ if (WARN_ON_ONCE(!gbo->kmap_use_count)) + return; + if (--gbo->kmap_use_count > 0) + return; + if (!kmap->virtual) return;
ttm_bo_kunmap(kmap); kmap->virtual = NULL; } + +/** + * drm_gem_vram_kunmap() - Unmaps a GEM VRAM object + * @gbo: the GEM VRAM object + */ +void drm_gem_vram_kunmap(struct drm_gem_vram_object *gbo) +{ + mutex_lock(&gbo->kmap_lock); + drm_gem_vram_kunmap_locked(gbo); + mutex_unlock(&gbo->kmap_lock); +} EXPORT_SYMBOL(drm_gem_vram_kunmap);
/** diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h index b41d932eb53a..47f7e01d2805 100644 --- a/include/drm/drm_gem_vram_helper.h +++ b/include/drm/drm_gem_vram_helper.h @@ -40,6 +40,19 @@ struct drm_gem_vram_object { struct ttm_buffer_object bo; struct ttm_bo_kmap_obj kmap;
+ /** + * @kmap_lock: Protects the kmap address and use count + */ + struct mutex kmap_lock; + + /** + * @kmap_use_count: + * + * Reference count on the virtual address. + * The address are un-mapped when the count reaches zero. + */ + unsigned int kmap_use_count; + /* Supported placements are %TTM_PL_VRAM and %TTM_PL_SYSTEM */ struct ttm_placement placement; struct ttm_place placements[2];
The generic fbdev emulation will map and unmap the framebuffer's memory if required. As consoles are most often updated while being on screen, we map the fbdev buffer while it's being displayed. This avoids frequent map/unmap operations in the fbdev code. The original fbdev code in ast used the same trick to improve performance.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Noralf Trønnes noralf@tronnes.org Cc: Dave Airlie airlied@redhat.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Sam Ravnborg sam@ravnborg.org Cc: Gerd Hoffmann kraxel@redhat.com Cc: Oleksandr Andrushchenko oleksandr_andrushchenko@epam.com Cc: CK Hu ck.hu@mediatek.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Alex Deucher alexander.deucher@amd.com Cc: "Christian König" christian.koenig@amd.com Cc: YueHaibing yuehaibing@huawei.com Cc: Sam Bobroff sbobroff@linux.ibm.com Cc: Huang Rui ray.huang@amd.com Cc: "Y.C. Chen" yc_chen@aspeedtech.com Cc: Rong Chen rong.a.chen@intel.com Cc: Feng Tang feng.tang@intel.com Cc: Huang Ying ying.huang@intel.com --- drivers/gpu/drm/ast/ast_mode.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index c792362024a5..35ba25262f54 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -529,13 +529,20 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, int atomic) { + struct drm_fb_helper *fb_helper = crtc->dev->fb_helper; struct drm_gem_vram_object *gbo; int ret; s64 gpu_addr; + void *base;
if (!atomic && fb) { gbo = drm_gem_vram_of_gem(fb->obj[0]); drm_gem_vram_unpin(gbo); + + // Unmap fbdev FB if it's not being displayed + // any longer. + if (fb == fb_helper->buffer->fb) + drm_gem_vram_kunmap(gbo); }
gbo = drm_gem_vram_of_gem(crtc->primary->fb->obj[0]); @@ -552,6 +559,15 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc, ast_set_offset_reg(crtc); ast_set_start_address_crt1(crtc, (u32)gpu_addr);
+ // Map fbdev FB while it's being displayed. This + // avoids frequent mapping and unmapping withing the + // fbdev code. + if (crtc->primary->fb == fb_helper->buffer->fb) { + base = drm_gem_vram_kmap(gbo, true, NULL); + if (IS_ERR(base)) + DRM_ERROR("failed to kmap fbcon\n"); + } + return 0;
err_drm_gem_vram_unpin: @@ -605,10 +621,14 @@ static void ast_crtc_disable(struct drm_crtc *crtc) DRM_DEBUG_KMS("\n"); ast_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { + struct drm_fb_helper *fb_helper = crtc->dev->fb_helper; struct drm_framebuffer *fb = crtc->primary->fb; struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(fb->obj[0]);
+ // Unmap if it's the fbdev FB. + if (fb == fb_helper->buffer->fb) + drm_gem_vram_kunmap(gbo); drm_gem_vram_unpin(gbo); } crtc->primary->fb = NULL;
The generic fbdev emulation will map and unmap the framebuffer's memory if required. As consoles are most often updated while being on screen, we map the fbdev buffer while it's being displayed. This avoids frequent map/unmap operations in the fbdev code. The original fbdev code in mgag200 used the same trick to improve performance.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Fixes: 90f479ae51af ("drm/mgag200: Replace struct mga_fbdev with generic framebuffer emulation") Cc: Thomas Zimmermann tzimmermann@suse.de Cc: Noralf Trønnes noralf@tronnes.org Cc: Dave Airlie airlied@redhat.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Gerd Hoffmann kraxel@redhat.com Cc: Alex Deucher alexander.deucher@amd.com Cc: "Christian König" christian.koenig@amd.com Cc: Sam Ravnborg sam@ravnborg.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Huang Rui ray.huang@amd.com Cc: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com Cc: "Michał Mirosław" mirq-linux@rere.qmqm.pl Cc: Armijn Hemel armijn@tjaldur.nl Cc: Rong Chen rong.a.chen@intel.com Cc: Feng Tang feng.tang@intel.com Cc: Huang Ying ying.huang@intel.com --- drivers/gpu/drm/mgag200/mgag200_mode.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index 822f2a13748f..a6391144feb5 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -860,13 +860,20 @@ static int mga_crtc_do_set_base(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, int atomic) { + struct drm_fb_helper *fb_helper = crtc->dev->fb_helper; struct drm_gem_vram_object *gbo; int ret; s64 gpu_addr; + void *base;
if (!atomic && fb) { gbo = drm_gem_vram_of_gem(fb->obj[0]); drm_gem_vram_unpin(gbo); + + // Unmap fbdev FB if it's not being displayed + // any longer. + if (fb == fb_helper->buffer->fb) + drm_gem_vram_kunmap(gbo); }
gbo = drm_gem_vram_of_gem(crtc->primary->fb->obj[0]); @@ -882,6 +889,15 @@ static int mga_crtc_do_set_base(struct drm_crtc *crtc,
mga_set_start_address(crtc, (u32)gpu_addr);
+ // Map fbdev FB while it's being displayed. This + // avoids frequent mapping and unmapping withing the + // fbdev code. + if (crtc->primary->fb == fb_helper->buffer->fb) { + base = drm_gem_vram_kmap(gbo, true, NULL); + if (IS_ERR(base)) + DRM_ERROR("failed to kmap fbcon\n"); + } + return 0;
err_drm_gem_vram_unpin: @@ -1403,9 +1419,14 @@ static void mga_crtc_disable(struct drm_crtc *crtc) DRM_DEBUG_KMS("\n"); mga_crtc_dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->primary->fb) { + struct drm_fb_helper *fb_helper = crtc->dev->fb_helper; struct drm_framebuffer *fb = crtc->primary->fb; struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(fb->obj[0]); + + // Unmap if it's the fbdev FB. + if (fb == fb_helper->buffer->fb) + drm_gem_vram_kunmap(gbo); drm_gem_vram_unpin(gbo); } crtc->primary->fb = NULL;
dri-devel@lists.freedesktop.org