We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions.
Signed-off-by: Grigori Goronzy greg@chown.ath.cx --- drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c131..7199e19 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, true); + + r = reservation_object_test_signaled_rcu(robj->tbo.resv, true); + if (r == 0) + r = -EBUSY; + else + r = 0; + + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); return r; }
This was regressed by commit 39e7f6f8, although I don't know of any actual issues caused by it.
The storage domain is read without TTM locking now, but the lock never helped to prevent any races.
Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy greg@chown.ath.cx --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7199e19..013ec71 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -476,6 +476,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret;
/* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev);
Newer ASICs have more VRAM on average and allocating more GART as well can have advantages. Also see commit edcd26e8.
Ideally, we should scale GART size based on actual VRAM size, but that requires significant restructuring of initialization.
v2: extract small helper, apply to error paths
Signed-off-by: Grigori Goronzy greg@chown.ath.cx --- drivers/gpu/drm/radeon/radeon_device.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a7fdfa4..14deeae 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1080,6 +1080,22 @@ static bool radeon_check_pot_argument(int arg) }
/** + * Determine a sensible default GART size according to ASIC family. + * + * @family ASIC family name + */ +static int radeon_gart_size_auto(enum radeon_family family) +{ + /* default to a larger gart size on newer asics */ + if (family >= CHIP_TAHITI) + return 2048; + else if (family >= CHIP_RV770) + return 1024; + else + return 512; +} + +/** * radeon_check_arguments - validate module params * * @rdev: radeon_device pointer @@ -1097,27 +1113,17 @@ static void radeon_check_arguments(struct radeon_device *rdev) }
if (radeon_gart_size == -1) { - /* default to a larger gart size on newer asics */ - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20;
Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy greg@chown.ath.cx --- drivers/gpu/drm/radeon/radeon_device.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 14deeae..dddd5df 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1578,6 +1578,20 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); }
+ /* unpin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + } + /* unpin the front buffers */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb);
[ Dropping stable@vger.kernel.org from Cc; the patch will be picked up for stable after it lands in Linus' tree ]
On 03.07.2015 08:54, Grigori Goronzy wrote:
Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy greg@chown.ath.cx
drivers/gpu/drm/radeon/radeon_device.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 14deeae..dddd5df 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1578,6 +1578,20 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); }
- /* unpin cursors */
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
if (radeon_crtc->cursor_bo) {
struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo);
r = radeon_bo_reserve(robj, false);
if (r == 0) {
radeon_bo_unpin(robj);
radeon_bo_unreserve(robj);
}
}
- }
- /* unpin the front buffers */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb);
This could be done in the same loop as the front buffers.
On resume, the cursor BO is currently pinned again by radeon_cursor_reset -> radeon_set_cursor. However, radeon_cursor_reset is also called when changing the video mode, in which case it causes the cursor BO pin count to increase by 1 for each CRTC using it. Presumably, the mouse cursor would end up broken again on suspend/resume after that for you.
We need a solution which pins the BO again on resume but doesn't increase the pin count during a mode change. I'm not sure right now what the best way is to achieve that, I'll think about it more later.
On 2015-07-03 05:30, Michel Dänzer wrote:
This could be done in the same loop as the front buffers.
Sure, I just thought it looks cleaner this way.
On resume, the cursor BO is currently pinned again by radeon_cursor_reset -> radeon_set_cursor. However, radeon_cursor_reset is also called when changing the video mode, in which case it causes the cursor BO pin count to increase by 1 for each CRTC using it. Presumably, the mouse cursor would end up broken again on suspend/resume after that for you.
Indeed. It seems to be problematic overall that radeon_cursor_reset does unconditionally increment the pin count. As soon as a mode is switched with cursor enabled, the cursor BO will stay pinned forever.
We need a solution which pins the BO again on resume but doesn't increase the pin count during a mode change. I'm not sure right now what the best way is to achieve that, I'll think about it more later.
How about this:
Never let radeon_set_cursor mess with the pin count, do that in radeon_crtc_cursor_set2 only, and make sure that the reference^Wpin count is updated accordingly (i.e. exactly one pin per crtc). Then add some explicit cursor resume code that traverses the crtc list and re-pins as needed. Maybe that that nice, but should work.
Grigori
On 03.07.2015 17:16, Grigori Goronzy wrote:
On 2015-07-03 05:30, Michel Dänzer wrote:
On resume, the cursor BO is currently pinned again by radeon_cursor_reset -> radeon_set_cursor. However, radeon_cursor_reset is also called when changing the video mode, in which case it causes the cursor BO pin count to increase by 1 for each CRTC using it. Presumably, the mouse cursor would end up broken again on suspend/resume after that for you.
Indeed. It seems to be problematic overall that radeon_cursor_reset does unconditionally increment the pin count. As soon as a mode is switched with cursor enabled, the cursor BO will stay pinned forever.
Exactly, so we can't ignore that for this fix.
We need a solution which pins the BO again on resume but doesn't increase the pin count during a mode change. I'm not sure right now what the best way is to achieve that, I'll think about it more later.
How about this:
Never let radeon_set_cursor mess with the pin count, do that in radeon_crtc_cursor_set2 only, and make sure that the reference^Wpin count is updated accordingly (i.e. exactly one pin per crtc). Then add some explicit cursor resume code that traverses the crtc list and re-pins as needed. Maybe that that nice, but should work.
Sounds good. I probably won't get around to playing with this before next week, feel free to give it a try in the meantime.
On 03.07.2015 01:54, Grigori Goronzy wrote:
We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions.
Signed-off-by: Grigori Goronzy greg@chown.ath.cx
Patche #1-#3 are Reviewed-by: Christian König christian.koenig@amd.com
Patch #4 is a nice catch, but as Michel already noted needs more thoughts for a complete fix.
Regards, Christian.
drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c131..7199e19 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) {
- struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj;
@@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj);
- r = radeon_bo_wait(robj, &cur_placement, true);
- r = reservation_object_test_signaled_rcu(robj->tbo.resv, true);
- if (r == 0)
r = -EBUSY;
- else
r = 0;
- cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj);
- r = radeon_gem_handle_lockup(rdev, r); return r; }
On Fri, Jul 3, 2015 at 5:45 AM, Christian König deathsimple@vodafone.de wrote:
On 03.07.2015 01:54, Grigori Goronzy wrote:
We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions.
Signed-off-by: Grigori Goronzy greg@chown.ath.cx
Patche #1-#3 are Reviewed-by: Christian König christian.koenig@amd.com
Applied 1-3. thanks!
Alex
Patch #4 is a nice catch, but as Michel already noted needs more thoughts for a complete fix.
Regards, Christian.
drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c131..7199e19 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) {
struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj;
@@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, &cur_placement, true);
r = reservation_object_test_signaled_rcu(robj->tbo.resv, true);
if (r == 0)
r = -EBUSY;
else
r = 0;
cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj);
}r = radeon_gem_handle_lockup(rdev, r); return r;
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org