We need to set the engine bit to select the ME and also set the full cache bit. Should help stability on TN and cayman.
V2: fix up surface sync in ib execute as well
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ni.c | 16 +++++++--------- drivers/gpu/drm/radeon/nid.h | 1 + 2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9f11a55..9d71a21 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1319,13 +1319,12 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, { struct radeon_ring *ring = &rdev->ring[fence->ring]; u64 addr = rdev->fence_drv[fence->ring].gpu_addr; + u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | + PACKET3_SH_ACTION_ENA;
/* flush read cache over gart for this vmid */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ @@ -1341,6 +1340,8 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | + PACKET3_SH_ACTION_ENA;
/* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); @@ -1365,14 +1366,11 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) (ib->vm ? (ib->vm->id << 24) : 0));
/* flush read cache over gart for this vmid */ - radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); - radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); - radeon_ring_write(ring, 10); /* poll interval */ + radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ }
static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 22421bc..d996033 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1154,6 +1154,7 @@ # define PACKET3_DB_ACTION_ENA (1 << 26) # define PACKET3_SH_ACTION_ENA (1 << 27) # define PACKET3_SX_ACTION_ENA (1 << 28) +# define PACKET3_ENGINE_ME (1 << 31) #define PACKET3_ME_INITIALIZE 0x44 #define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16) #define PACKET3_COND_WRITE 0x45
Needed to properly flush the read caches for fences.
Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/r600.c | 13 +++++++------ drivers/gpu/drm/radeon/r600d.h | 1 + 2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ad99bae..3dce370 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2706,14 +2706,17 @@ void r600_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[fence->ring]; + u32 cp_coher_cntl = PACKET3_TC_ACTION_ENA | PACKET3_VC_ACTION_ENA | + PACKET3_SH_ACTION_ENA; + + if (rdev->family >= CHIP_RV770) + cp_coher_cntl |= PACKET3_FULL_CACHE_ENA;
if (rdev->wb.use_event) { u64 addr = rdev->fence_drv[fence->ring].gpu_addr; /* flush read cache over gart */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | - PACKET3_VC_ACTION_ENA | - PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ @@ -2727,9 +2730,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } else { /* flush read cache over gart */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); - radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | - PACKET3_VC_ACTION_ENA | - PACKET3_SH_ACTION_ENA); + radeon_ring_write(ring, cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); radeon_ring_write(ring, 10); /* poll interval */ diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 3fca4b9..37455f6 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1582,6 +1582,7 @@ # define PACKET3_CP_DMA_CMD_DAIC (1 << 29) #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) +# define PACKET3_FULL_CACHE_ENA (1 << 20) /* r7xx+ only */ # define PACKET3_TC_ACTION_ENA (1 << 23) # define PACKET3_VC_ACTION_ENA (1 << 24) # define PACKET3_CB_ACTION_ENA (1 << 25)
dri-devel@lists.freedesktop.org