We need to set the engine bit to select the ME and also set the full cache bit. Should help stability on TN and cayman.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ni.c | 7 +++---- drivers/gpu/drm/radeon/nid.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9f11a55..04e9516 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1319,13 +1319,12 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | + PACKET3_SH_ACTION_ENA;
/* flush read cache over gart for this vmid */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 22421bc..d996033 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1154,6 +1154,7 @@ # define PACKET3_DB_ACTION_ENA (1 << 26) # define PACKET3_SH_ACTION_ENA (1 << 27) # define PACKET3_SX_ACTION_ENA (1 << 28) +# define PACKET3_ENGINE_ME (1 << 31) #define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) #define PACKET3_COND_WRITE 0x45
Needed to properly flush the read caches for fences.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r600.c | 13 +++++++------ drivers/gpu/drm/radeon/r600d.h | 1 + 2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ad99bae..3dce370 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2706,14 +2706,17 @@ void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[fence->ring]; + u32 cp_coher_cntl = PACKET3_TC_ACTION_ENA | PACKET3_VC_ACTION_ENA | + PACKET3_SH_ACTION_ENA; + + if (rdev->family >= CHIP_RV770) + cp_coher_cntl |= PACKET3_FULL_CACHE_ENA;
if (rdev->wb.use_event) { u64 addr = rdev->fence_drv[fence->ring].gpu_addr; /* flush read cache over gart */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | - PACKET3_VC_ACTION_ENA | - PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ @@ -2727,9 +2730,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } else { /* flush read cache over gart */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | - PACKET3_VC_ACTION_ENA | - PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 3fca4b9..37455f6 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1582,6 +1582,7 @@ # define PACKET3_CP_DMA_CMD_DAIC (1 << 29) #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_FULL_CACHE_ENA (1 << 20) /* r7xx+ only */ # define PACKET3_TC_ACTION_ENA (1 << 23) # define PACKET3_VC_ACTION_ENA (1 << 24) # define PACKET3_CB_ACTION_ENA (1 << 25)
On Thu, Jan 16, 2014 at 06:14:58PM -0500, Alex Deucher wrote:
We need to set the engine bit to select the ME and also set the full cache bit. Should help stability on TN and cayman.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org
drivers/gpu/drm/radeon/ni.c | 7 +++---- drivers/gpu/drm/radeon/nid.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9f11a55..04e9516 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1319,13 +1319,12 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;
/* flush read cache over gart for this vmid */
- radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
- radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
- radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
- radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */
Doesn't this also need to be changed in cayman_ring_ib_execute() ?
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 22421bc..d996033 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1154,6 +1154,7 @@ # define PACKET3_DB_ACTION_ENA (1 << 26) # define PACKET3_SH_ACTION_ENA (1 << 27) # define PACKET3_SX_ACTION_ENA (1 << 28) +# define PACKET3_ENGINE_ME (1 << 31)
Technically (1 << 31) is undefined. It should be (1U << 31).
-Tom
#define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
#define PACKET3_COND_WRITE 0x45
1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
-----Original Message----- From: Tom Stellard [mailto:tom@stellard.net] Sent: Thursday, January 16, 2014 6:25 PM To: Alex Deucher Cc: dri-devel@lists.freedesktop.org; Deucher, Alexander; stable@vger.kernel.org Subject: Re: [PATCH 1/2] drm/radeon: fix surface sync in fence on cayman
On Thu, Jan 16, 2014 at 06:14:58PM -0500, Alex Deucher wrote:
We need to set the engine bit to select the ME and also set the full cache bit. Should help stability on TN and cayman.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org
drivers/gpu/drm/radeon/ni.c | 7 +++---- drivers/gpu/drm/radeon/nid.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9f11a55..04e9516 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1319,13 +1319,12 @@ void cayman_fence_ring_emit(struct
radeon_device *rdev,
{ struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
- u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;
/* flush read cache over gart for this vmid */
- radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- radeon_ring_write(ring, (CP_COHER_CNTL2 -
PACKET3_SET_CONFIG_REG_START) >> 2);
- radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
- radeon_ring_write(ring, PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA);
- radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */
Doesn't this also need to be changed in cayman_ring_ib_execute() ?
Oh, yeah. That also emits a surface sync packet. I'm not sure why we need one there, but I'll fix that up too.
diff --git a/drivers/gpu/drm/radeon/nid.h
b/drivers/gpu/drm/radeon/nid.h
index 22421bc..d996033 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1154,6 +1154,7 @@ # define PACKET3_DB_ACTION_ENA (1 << 26) # define PACKET3_SH_ACTION_ENA (1 << 27) # define PACKET3_SX_ACTION_ENA (1 << 28) +# define PACKET3_ENGINE_ME (1 << 31)
Technically (1 << 31) is undefined. It should be (1U << 31).
Hmmm... a lot of drivers do this pretty regularly. I guess gcc does the right thing?
Alex
-Tom
#define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
#define PACKET3_COND_WRITE 0x45
1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Thu, Jan 16, 2014 at 11:30:14PM +0000, Deucher, Alexander wrote:
-----Original Message----- From: Tom Stellard [mailto:tom@stellard.net] Sent: Thursday, January 16, 2014 6:25 PM To: Alex Deucher Cc: dri-devel@lists.freedesktop.org; Deucher, Alexander; stable@vger.kernel.org Subject: Re: [PATCH 1/2] drm/radeon: fix surface sync in fence on cayman
On Thu, Jan 16, 2014 at 06:14:58PM -0500, Alex Deucher wrote:
We need to set the engine bit to select the ME and also set the full cache bit. Should help stability on TN and cayman.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org
drivers/gpu/drm/radeon/ni.c | 7 +++---- drivers/gpu/drm/radeon/nid.h | 1 + 2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9f11a55..04e9516 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1319,13 +1319,12 @@ void cayman_fence_ring_emit(struct
radeon_device *rdev,
{ struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
- u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA |
PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA;
/* flush read cache over gart for this vmid */
- radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- radeon_ring_write(ring, (CP_COHER_CNTL2 -
PACKET3_SET_CONFIG_REG_START) >> 2);
- radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
- radeon_ring_write(ring, PACKET3_TC_ACTION_ENA |
PACKET3_SH_ACTION_ENA);
- radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */
Doesn't this also need to be changed in cayman_ring_ib_execute() ?
Oh, yeah. That also emits a surface sync packet. I'm not sure why we need one there, but I'll fix that up too.
diff --git a/drivers/gpu/drm/radeon/nid.h
b/drivers/gpu/drm/radeon/nid.h
index 22421bc..d996033 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1154,6 +1154,7 @@ # define PACKET3_DB_ACTION_ENA (1 << 26) # define PACKET3_SH_ACTION_ENA (1 << 27) # define PACKET3_SX_ACTION_ENA (1 << 28) +# define PACKET3_ENGINE_ME (1 << 31)
Technically (1 << 31) is undefined. It should be (1U << 31).
Hmmm... a lot of drivers do this pretty regularly. I guess gcc does the right thing?
Yes, I've seen this all over the place, and I'm sure most compilers do what everyone expects here.
-Tom
Alex
-Tom
#define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
#define PACKET3_COND_WRITE 0x45
1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org