Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
User-space is vanilla Fedora 19 / x86_64 with updates. radeon_drv.so says:
[ 62.574] (II) LoadModule: "radeon" [ 62.574] (II) Loading /usr/lib64/xorg/modules/drivers/radeon_drv.so [ 62.574] (II) Module radeon: vendor="X.Org Foundation" [ 62.574] compiled for 1.14.0, module version = 7.1.99 [ 62.574] Module class: X.Org Video Driver [ 62.574] ABI class: X.Org Video Driver, version 14.1 ... [ 62.585] (--) RADEON(0): Chipset: "ATI Radeon X550 (RV370) 5B63 (PCIE)" (ChipID = 0x5b63)
See also my original report to LKML: http://marc.info/?l=linux-kernel&m=140829066726743&w=2
/Mikael
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..3ff9c53 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1070,6 +1070,20 @@ void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, rdev->config.r100.hdp_cntl); }
+/** + * r100_mmio_hdp_flush - flush Host Data Path via MMIO + * rdev: radeon device structure + */ +void r100_mmio_hdp_flush(struct radeon_device *rdev) +{ + WREG32(RADEON_HOST_PATH_CNTL, + rdev->config.r100.hdp_cntl | RADEON_HDP_READ_BUFFER_INVALIDATE); + (void)RREG32(RADEON_HOST_PATH_CNTL); + WREG32(RADEON_HOST_PATH_CNTL, + rdev->config.r100.hdp_cntl); + (void)RREG32(RADEON_HOST_PATH_CNTL); +} + static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..c23a123 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -408,7 +408,7 @@ static struct radeon_asic r300_asic_pcie = { .resume = &r300_resume, .vga_set_state = &r100_vga_set_state, .asic_reset = &r300_asic_reset, - .mmio_hdp_flush = NULL, + .mmio_hdp_flush = r100_mmio_hdp_flush, .gui_idle = &r100_gui_idle, .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..e9b1c35 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -150,6 +150,8 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring); +void r100_mmio_hdp_flush(struct radeon_device *rdev); + /* * r200,rv250,rs300,rv280 */ diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b..3d0f564 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -368,6 +368,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = radeon_bo_wait(robj, &cur_placement, false); /* Flush HDP cache via MMIO if necessary */ if (rdev->asic->mmio_hdp_flush && + !rdev->asic->ring[RADEON_RING_TYPE_GFX_INDEX]->hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); drm_gem_object_unreference_unlocked(gobj); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index d656079..b82843b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -188,7 +188,8 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, /* If we are emitting the HDP flush via the ring buffer, we need to * do it before padding. */ - if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush) + if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush && + !rdev->asic->mmio_hdp_flush) rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) {
Michel Dänzer writes:
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
Thanks for the patch, I'll test it on Friday evening when I'm back home and have access to the affected machine.
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..3ff9c53 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1070,6 +1070,20 @@ void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, rdev->config.r100.hdp_cntl); }
+/**
- r100_mmio_hdp_flush - flush Host Data Path via MMIO
- rdev: radeon device structure
- */
+void r100_mmio_hdp_flush(struct radeon_device *rdev) +{
- WREG32(RADEON_HOST_PATH_CNTL,
rdev->config.r100.hdp_cntl | RADEON_HDP_READ_BUFFER_INVALIDATE);
- (void)RREG32(RADEON_HOST_PATH_CNTL);
- WREG32(RADEON_HOST_PATH_CNTL,
rdev->config.r100.hdp_cntl);
- (void)RREG32(RADEON_HOST_PATH_CNTL);
+}
static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..c23a123 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -408,7 +408,7 @@ static struct radeon_asic r300_asic_pcie = { .resume = &r300_resume, .vga_set_state = &r100_vga_set_state, .asic_reset = &r300_asic_reset,
- .mmio_hdp_flush = NULL,
- .mmio_hdp_flush = r100_mmio_hdp_flush, .gui_idle = &r100_gui_idle, .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..e9b1c35 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -150,6 +150,8 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring); +void r100_mmio_hdp_flush(struct radeon_device *rdev);
/*
- r200,rv250,rs300,rv280
*/ diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b..3d0f564 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -368,6 +368,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = radeon_bo_wait(robj, &cur_placement, false); /* Flush HDP cache via MMIO if necessary */ if (rdev->asic->mmio_hdp_flush &&
robj->rdev->asic->mmio_hdp_flush(rdev); drm_gem_object_unreference_unlocked(gobj);!rdev->asic->ring[RADEON_RING_TYPE_GFX_INDEX]->hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index d656079..b82843b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -188,7 +188,8 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, /* If we are emitting the HDP flush via the ring buffer, we need to * do it before padding. */
- if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush)
- if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush &&
rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) {!rdev->asic->mmio_hdp_flush)
-- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
--
Michel Dänzer writes:
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
Tested, sorry no joy. I first reconfirmed the screen corruption with 3.17-rc3. I then applied this and rebuilt/rebooted, and after a few minutes X had a hickup (screen went black, came back after a few seconds, but then no cursor or reaction to mouse events), but I was able to kill it via my Terminate_Server key binding. The kernel log showed:
[ 1641.247760] radeon 0000:01:00.0: ring 0 stalled for more than 10000msec [ 1641.247765] radeon 0000:01:00.0: GPU lockup (waiting for 0x0000000000006241 last fence id 0x0000000000006240 on ring 0) [ 1641.247768] radeon 0000:01:00.0: failed to get a new IB (-35) [ 1641.247770] [drm:radeon_cs_ib_fill] *ERROR* Failed to get ib ! [ 1641.404052] Failed to wait GUI idle while programming pipes. Bad things might happen. [ 1641.405075] radeon 0000:01:00.0: Saved 859 dwords of commands on ring 0. [ 1641.405084] radeon 0000:01:00.0: (r300_asic_reset:394) RBBM_STATUS=0x80010140 [ 1641.910649] radeon 0000:01:00.0: (r300_asic_reset:413) RBBM_STATUS=0x80010140 [ 1642.412182] radeon 0000:01:00.0: (r300_asic_reset:425) RBBM_STATUS=0x00000140 [ 1642.412218] radeon 0000:01:00.0: GPU reset succeed [ 1642.412220] radeon 0000:01:00.0: GPU reset succeeded, trying to resume [ 1642.412224] radeon 0000:01:00.0: ffff88060274f800 unpin not necessary [ 1642.626303] [drm] radeon: 1 quad pipes, 1 Z pipes initialized. [ 1642.626325] [drm] PCIE GART of 512M enabled (table at 0x00000000E0040000). [ 1642.626328] radeon 0000:01:00.0: WB enabled [ 1642.626331] radeon 0000:01:00.0: fence driver on ring 0 use gpu addr 0x00000000c0000000 and cpu addr 0xffff8800d9b9f000 [ 1642.626375] [drm] radeon: ring at 0x00000000C0001000 [ 1642.783220] [drm:r100_ring_test] *ERROR* radeon: ring test failed (scratch(0x15E8)=0xCAFEDEAD) [ 1642.783222] [drm:r100_cp_init] *ERROR* radeon: cp isn't working (-22). [ 1642.783224] radeon 0000:01:00.0: failed initializing CP (-22).
With a revert of the HDP flush patch things are stable.
/Mikael
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..3ff9c53 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1070,6 +1070,20 @@ void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, rdev->config.r100.hdp_cntl); }
+/**
- r100_mmio_hdp_flush - flush Host Data Path via MMIO
- rdev: radeon device structure
- */
+void r100_mmio_hdp_flush(struct radeon_device *rdev) +{
- WREG32(RADEON_HOST_PATH_CNTL,
rdev->config.r100.hdp_cntl | RADEON_HDP_READ_BUFFER_INVALIDATE);
- (void)RREG32(RADEON_HOST_PATH_CNTL);
- WREG32(RADEON_HOST_PATH_CNTL,
rdev->config.r100.hdp_cntl);
- (void)RREG32(RADEON_HOST_PATH_CNTL);
+}
static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..c23a123 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -408,7 +408,7 @@ static struct radeon_asic r300_asic_pcie = { .resume = &r300_resume, .vga_set_state = &r100_vga_set_state, .asic_reset = &r300_asic_reset,
- .mmio_hdp_flush = NULL,
- .mmio_hdp_flush = r100_mmio_hdp_flush, .gui_idle = &r100_gui_idle, .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..e9b1c35 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -150,6 +150,8 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring); +void r100_mmio_hdp_flush(struct radeon_device *rdev);
/*
- r200,rv250,rs300,rv280
*/ diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b..3d0f564 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -368,6 +368,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = radeon_bo_wait(robj, &cur_placement, false); /* Flush HDP cache via MMIO if necessary */ if (rdev->asic->mmio_hdp_flush &&
robj->rdev->asic->mmio_hdp_flush(rdev); drm_gem_object_unreference_unlocked(gobj);!rdev->asic->ring[RADEON_RING_TYPE_GFX_INDEX]->hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index d656079..b82843b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -188,7 +188,8 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, /* If we are emitting the HDP flush via the ring buffer, we need to * do it before padding. */
- if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush)
- if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush &&
rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) {!rdev->asic->mmio_hdp_flush)
-- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
--
On 06.09.2014 01:49, Mikael Pettersson wrote:
Michel Dänzer writes:
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
Tested, sorry no joy. I first reconfirmed the screen corruption with 3.17-rc3. I then applied this and rebuilt/rebooted, and after a few minutes X had a hickup (screen went black, came back after a few seconds, but then no cursor or reaction to mouse events), but I was able to kill it via my Terminate_Server key binding.
I was afraid so, thanks for testing it.
I can't see any other option than the patch below then. Can you confirm that this fixes the screen corruption?
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..b0098e7 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -821,6 +821,20 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) return RREG32(RADEON_CRTC2_CRNT_FRAME); }
+/** + * r100_ring_hdp_flush - flush Host Data Path via the ring buffer + * rdev: radeon device structure + * ring: ring buffer struct for emitting packets + */ +static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) +{ + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl | + RADEON_HDP_READ_BUFFER_INVALIDATE); + radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); + radeon_ring_write(ring, rdev->config.r100.hdp_cntl); +} + /* Who ever call radeon_fence_emit should call ring_lock and ask * for enough space (today caller are ib schedule and buffer move) */ void r100_fence_ring_emit(struct radeon_device *rdev, @@ -1056,20 +1070,6 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, (void)RREG32(RADEON_CP_RB_WPTR); }
-/** - * r100_ring_hdp_flush - flush Host Data Path via the ring buffer - * rdev: radeon device structure - * ring: ring buffer struct for emitting packets - */ -void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) -{ - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl | - RADEON_HDP_READ_BUFFER_INVALIDATE); - radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); - radeon_ring_write(ring, rdev->config.r100.hdp_cntl); -} - static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..2dd5847 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -185,7 +185,6 @@ static struct radeon_asic_ring r100_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, - .hdp_flush = &r100_ring_hdp_flush, };
static struct radeon_asic r100_asic = { @@ -332,7 +331,6 @@ static struct radeon_asic_ring r300_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr, - .hdp_flush = &r100_ring_hdp_flush, };
static struct radeon_asic r300_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..7756bc1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -148,8 +148,7 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); -void r100_ring_hdp_flush(struct radeon_device *rdev, - struct radeon_ring *ring); + /* * r200,rv250,rs300,rv280 */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830..ef5b60a 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -83,7 +83,7 @@ * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG * 2.39.0 - Add INFO query for number of active CUs * 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting - * CS to GPU + * CS to GPU on >= r600 */ #define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 40
Michel Dänzer writes:
On 06.09.2014 01:49, Mikael Pettersson wrote:
Michel Dänzer writes:
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
Tested, sorry no joy. I first reconfirmed the screen corruption with 3.17-rc3. I then applied this and rebuilt/rebooted, and after a few minutes X had a hickup (screen went black, came back after a few seconds, but then no cursor or reaction to mouse events), but I was able to kill it via my Terminate_Server key binding.
I was afraid so, thanks for testing it.
I can't see any other option than the patch below then. Can you confirm that this fixes the screen corruption?
I'll test this on Friday evening when I'm back home and have access to the affected machine.
/Mikael
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..b0098e7 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -821,6 +821,20 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) return RREG32(RADEON_CRTC2_CRNT_FRAME); }
+/**
- r100_ring_hdp_flush - flush Host Data Path via the ring buffer
- rdev: radeon device structure
- ring: ring buffer struct for emitting packets
- */
+static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) +{
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+}
/* Who ever call radeon_fence_emit should call ring_lock and ask
- for enough space (today caller are ib schedule and buffer move) */
void r100_fence_ring_emit(struct radeon_device *rdev, @@ -1056,20 +1070,6 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, (void)RREG32(RADEON_CP_RB_WPTR); }
-/**
- r100_ring_hdp_flush - flush Host Data Path via the ring buffer
- rdev: radeon device structure
- ring: ring buffer struct for emitting packets
- */
-void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) -{
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
-}
static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..2dd5847 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -185,7 +185,6 @@ static struct radeon_asic_ring r100_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr,
- .hdp_flush = &r100_ring_hdp_flush,
};
static struct radeon_asic r100_asic = { @@ -332,7 +331,6 @@ static struct radeon_asic_ring r300_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr,
- .hdp_flush = &r100_ring_hdp_flush,
};
static struct radeon_asic r300_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..7756bc1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -148,8 +148,7 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); -void r100_ring_hdp_flush(struct radeon_device *rdev,
struct radeon_ring *ring);
/*
- r200,rv250,rs300,rv280
*/ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830..ef5b60a 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -83,7 +83,7 @@
CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
- 2.39.0 - Add INFO query for number of active CUs
- 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
CS to GPU
*/
CS to GPU on >= r600
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 40
-- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
--
Michel Dänzer writes:
On 06.09.2014 01:49, Mikael Pettersson wrote:
Michel Dänzer writes:
On 30.08.2014 22:59, Mikael Pettersson wrote:
Since 3.17-rc1 my radeon card (RV370 / X1050 card) causes screen corruption after a while in X + firefox. This still occurs with yesterday's HEAD of Linus' repo. 3.16 and ealier kernels are fine.
I ran a bisect, which identified:
commit 72a9987edcedb89db988079a03c9b9c65b6ec9ac Author: Michel Dänzer michel.daenzer@amd.com Date: Thu Jul 31 18:43:49 2014 +0900
drm/radeon: Always flush the HDP cache before submitting a CS to the GPU
as the cause of my screen corruption. Reverting this from 3.17-rc2 (which requires manual intervention due to subsequent changes in radeon_ring_commit()) eliminates the screen corruption.
Does the patch below help?
Tested, sorry no joy. I first reconfirmed the screen corruption with 3.17-rc3. I then applied this and rebuilt/rebooted, and after a few minutes X had a hickup (screen went black, came back after a few seconds, but then no cursor or reaction to mouse events), but I was able to kill it via my Terminate_Server key binding.
I was afraid so, thanks for testing it.
I can't see any other option than the patch below then. Can you confirm that this fixes the screen corruption?
It does, thanks.
Tested-by: Mikael Pettersson mikpelinux@gmail.com
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 4c5ec44..b0098e7 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -821,6 +821,20 @@ u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc) return RREG32(RADEON_CRTC2_CRNT_FRAME); }
+/**
- r100_ring_hdp_flush - flush Host Data Path via the ring buffer
- rdev: radeon device structure
- ring: ring buffer struct for emitting packets
- */
+static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) +{
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
+}
/* Who ever call radeon_fence_emit should call ring_lock and ask
- for enough space (today caller are ib schedule and buffer move) */
void r100_fence_ring_emit(struct radeon_device *rdev, @@ -1056,20 +1070,6 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, (void)RREG32(RADEON_CP_RB_WPTR); }
-/**
- r100_ring_hdp_flush - flush Host Data Path via the ring buffer
- rdev: radeon device structure
- ring: ring buffer struct for emitting packets
- */
-void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring) -{
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
- radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
- radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
-}
static void r100_cp_load_microcode(struct radeon_device *rdev) { const __be32 *fw_data; diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe..2dd5847 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -185,7 +185,6 @@ static struct radeon_asic_ring r100_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr,
- .hdp_flush = &r100_ring_hdp_flush,
};
static struct radeon_asic r100_asic = { @@ -332,7 +331,6 @@ static struct radeon_asic_ring r300_gfx_ring = { .get_rptr = &r100_gfx_get_rptr, .get_wptr = &r100_gfx_get_wptr, .set_wptr = &r100_gfx_set_wptr,
- .hdp_flush = &r100_ring_hdp_flush,
};
static struct radeon_asic r300_asic = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc..7756bc1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -148,8 +148,7 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void r100_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); -void r100_ring_hdp_flush(struct radeon_device *rdev,
struct radeon_ring *ring);
/*
- r200,rv250,rs300,rv280
*/ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830..ef5b60a 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -83,7 +83,7 @@
CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
- 2.39.0 - Add INFO query for number of active CUs
- 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
CS to GPU
*/
CS to GPU on >= r600
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MINOR 40
-- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Mesa and X developer
--
dri-devel@lists.freedesktop.org