This patch set gets basic gpu reset working with amdgpu. It's only been tested via debugfs. It still needs to be integrated into the lockup detection in the driver to be used for runtime lockup resets.
Alex Deucher (7): drm/amdgpu: fix tonga smu resume drm/amdgpu: Add some tweaks to gfx 8 soft reset drm/amdgpu: clean up asic level reset for CI drm/amdgpu: clean up asic level reset for VI drm/amdgpu: post card after hard reset drm/amdgpu: add a debugfs property to trigger a GPU reset drm/amdgpu: drop hard_reset module parameter
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 20 +- drivers/gpu/drm/amd/amdgpu/cik.c | 308 +---------------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 23 +- drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 17 +- drivers/gpu/drm/amd/amdgpu/vi.c | 385 +---------------------------- 8 files changed, 54 insertions(+), 707 deletions(-)
Need to make sure smu buffers are pinned on resume. This matches what Fiji does.
Cc: stable@vger.kernel.org Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index f4a13465..0497784 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
static int tonga_dpm_suspend(void *handle) { - return 0; + return tonga_dpm_hw_fini(handle); }
static int tonga_dpm_resume(void *handle) { - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - mutex_lock(&adev->pm.mutex); - - ret = tonga_smu_start(adev); - if (ret) { - DRM_ERROR("SMU start failed\n"); - goto fail; - } - -fail: - mutex_unlock(&adev->pm.mutex); - return ret; + return tonga_dpm_hw_init(handle); }
static int tonga_dpm_set_clockgating_state(void *handle,
Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4a6d761..7f5625e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4186,7 +4186,18 @@ static int gfx_v8_0_soft_reset(void *handle) gfx_v8_0_cp_gfx_enable(adev, false);
/* Disable MEC parsing/prefetching */ - /* XXX todo */ + gfx_v8_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_STALL, 1); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_CLEAR, 1); + WREG32(mmGMCON_DEBUG, tmp); + + udelay(50); + }
if (grbm_soft_reset) { tmp = RREG32(mmGRBM_SOFT_RESET); @@ -4215,6 +4226,16 @@ static int gfx_v8_0_soft_reset(void *handle) WREG32(mmSRBM_SOFT_RESET, tmp); tmp = RREG32(mmSRBM_SOFT_RESET); } + + if (grbm_soft_reset || srbm_soft_reset) { + tmp = RREG32(mmGMCON_DEBUG); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_STALL, 0); + tmp = REG_SET_FIELD(tmp, + GMCON_DEBUG, GFX_CLEAR, 0); + WREG32(mmGMCON_DEBUG, tmp); + } + /* Wait a little for things to settle down */ udelay(50); gfx_v8_0_print_status((void *)adev);
Drop soft reset, always use pci config reset.
Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/cik.c | 308 +-------------------------------------- 1 file changed, 4 insertions(+), 304 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8265603..2bef867 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1060,257 +1060,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; }
-static void cik_print_gpu_status_regs(struct amdgpu_device *adev) -{ - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); - dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); -} - -/** - * cik_gpu_check_soft_reset - check which blocks are busy - * - * @adev: amdgpu_device pointer - * - * Check which blocks are busy and return the relevant reset - * mask to be used by cik_gpu_soft_reset(). - * Returns a mask of the blocks to be reset. - */ -u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev) -{ - u32 reset_mask = 0; - u32 tmp; - - /* GRBM_STATUS */ - tmp = RREG32(mmGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | - GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | - GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | - GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) - reset_mask |= AMDGPU_RESET_GFX; - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* GRBM_STATUS2 */ - tmp = RREG32(mmGRBM_STATUS2); - if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_RLC; - - /* SDMA0_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA; - - /* SDMA1_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS2 */ - tmp = RREG32(mmSRBM_STATUS2); - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA; - - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS */ - tmp = RREG32(mmSRBM_STATUS); - - if (tmp & SRBM_STATUS__IH_BUSY_MASK) - reset_mask |= AMDGPU_RESET_IH; - - if (tmp & SRBM_STATUS__SEM_BUSY_MASK) - reset_mask |= AMDGPU_RESET_SEM; - - if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) - reset_mask |= AMDGPU_RESET_GRBM; - - if (tmp & SRBM_STATUS__VMC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VMC; - - if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_MC; - - if (amdgpu_display_is_display_hung(adev)) - reset_mask |= AMDGPU_RESET_DISPLAY; - - /* Skip MC reset as it's mostly likely not hung, just busy */ - if (reset_mask & AMDGPU_RESET_MC) { - DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); - reset_mask &= ~AMDGPU_RESET_MC; - } - - return reset_mask; -} - -/** - * cik_gpu_soft_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * @reset_mask: mask of which blocks to reset - * - * Soft reset the blocks specified in @reset_mask. - */ -static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) -{ - struct amdgpu_mode_mc_save save; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; - u32 tmp; - - if (reset_mask == 0) - return; - - dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); - - cik_print_gpu_status_regs(adev); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - - /* disable CG/PG */ - - /* stop the rlc */ - gfx_v7_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - if (reset_mask & AMDGPU_RESET_DMA) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - } - if (reset_mask & AMDGPU_RESET_DMA1) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - } - - gmc_v7_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } - - if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) - grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK | - GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK; - - if (reset_mask & AMDGPU_RESET_CP) { - grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK; - - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; - } - - if (reset_mask & AMDGPU_RESET_DMA) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - - if (reset_mask & AMDGPU_RESET_DMA1) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; - - if (reset_mask & AMDGPU_RESET_DISPLAY) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; - - if (reset_mask & AMDGPU_RESET_RLC) - grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK; - - if (reset_mask & AMDGPU_RESET_SEM) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK; - - if (reset_mask & AMDGPU_RESET_IH) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; - - if (reset_mask & AMDGPU_RESET_GRBM) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; - - if (reset_mask & AMDGPU_RESET_VMC) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK; - - if (!(adev->flags & AMD_IS_APU)) { - if (reset_mask & AMDGPU_RESET_MC) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK; - } - - if (grbm_soft_reset) { - tmp = RREG32(mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - } - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - } - - /* Wait a little for things to settle down */ - udelay(50); - - gmc_v7_0_mc_resume(adev, &save); - udelay(50); - - cik_print_gpu_status_regs(adev); -} - struct kv_reset_save_regs { u32 gmcon_reng_execute; u32 gmcon_misc; @@ -1406,45 +1155,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; struct kv_reset_save_regs kv_save = { 0 }; - u32 tmp, i; + u32 i;
dev_info(adev->dev, "GPU pci config reset\n");
- /* disable dpm? */ - - /* disable cg/pg */ - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | - CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, - CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - /* XXX other engines? */ - - /* halt the rlc, disable cp internal ints */ - gfx_v7_0_rlc_stop(adev); - - udelay(50); - - /* disable mem access */ - gmc_v7_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timed out !\n"); - } - if (adev->flags & AMD_IS_APU) kv_save_regs_for_reset(adev, &kv_save);
@@ -1490,26 +1205,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu */ static int cik_asic_reset(struct amdgpu_device *adev) { - u32 reset_mask; - - reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); - - if (reset_mask) - cik_set_bios_scratch_engine_hung(adev, true); - - /* try soft reset */ - cik_gpu_soft_reset(adev, reset_mask); - - reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); - - /* try pci config reset */ - if (reset_mask && amdgpu_hard_reset) - cik_gpu_pci_config_reset(adev); + cik_set_bios_scratch_engine_hung(adev, true);
- reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); + cik_gpu_pci_config_reset(adev);
- if (!reset_mask) - cik_set_bios_scratch_engine_hung(adev, false); + cik_set_bios_scratch_engine_hung(adev, false);
return 0; }
Drop soft reset, always use pci config reset.
Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/vi.c | 385 +--------------------------------------- 1 file changed, 4 insertions(+), 381 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index d5e02e1..48b2ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -574,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; }
-static void vi_print_gpu_status_regs(struct amdgpu_device *adev) -{ - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); - if (adev->sdma.num_instances > 1) { - dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); - } - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); -} - -/** - * vi_gpu_check_soft_reset - check which blocks are busy - * - * @adev: amdgpu_device pointer - * - * Check which blocks are busy and return the relevant reset - * mask to be used by vi_gpu_soft_reset(). - * Returns a mask of the blocks to be reset. - */ -u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev) -{ - u32 reset_mask = 0; - u32 tmp; - - /* GRBM_STATUS */ - tmp = RREG32(mmGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | - GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | - GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | - GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) - reset_mask |= AMDGPU_RESET_GFX; - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* GRBM_STATUS2 */ - tmp = RREG32(mmGRBM_STATUS2); - if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_RLC; - - if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK | - GRBM_STATUS2__CPC_BUSY_MASK | - GRBM_STATUS2__CPG_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* SRBM_STATUS2 */ - tmp = RREG32(mmSRBM_STATUS2); - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA; - - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS */ - tmp = RREG32(mmSRBM_STATUS); - - if (tmp & SRBM_STATUS__IH_BUSY_MASK) - reset_mask |= AMDGPU_RESET_IH; - - if (tmp & SRBM_STATUS__SEM_BUSY_MASK) - reset_mask |= AMDGPU_RESET_SEM; - - if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) - reset_mask |= AMDGPU_RESET_GRBM; - - if (adev->asic_type != CHIP_TOPAZ) { - if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK | - SRBM_STATUS__UVD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_UVD; - } - - if (tmp & SRBM_STATUS__VMC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VMC; - - if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_MC; - - /* SDMA0_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA; - - /* SDMA1_STATUS_REG */ - if (adev->sdma.num_instances > 1) { - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA1; - } -#if 0 - /* VCE_STATUS */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmVCE_STATUS); - if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VCE; - if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VCE1; - - } - - if (adev->asic_type != CHIP_TOPAZ) { - if (amdgpu_display_is_display_hung(adev)) - reset_mask |= AMDGPU_RESET_DISPLAY; - } -#endif - - /* Skip MC reset as it's mostly likely not hung, just busy */ - if (reset_mask & AMDGPU_RESET_MC) { - DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); - reset_mask &= ~AMDGPU_RESET_MC; - } - - return reset_mask; -} - -/** - * vi_gpu_soft_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * @reset_mask: mask of which blocks to reset - * - * Soft reset the blocks specified in @reset_mask. - */ -static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) -{ - struct amdgpu_mode_mc_save save; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; - u32 tmp; - - if (reset_mask == 0) - return; - - dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); - - vi_print_gpu_status_regs(adev); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - - /* disable CG/PG */ - - /* stop the rlc */ - //XXX - //gfx_v8_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - tmp = RREG32(mmCP_ME_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); - WREG32(mmCP_ME_CNTL, tmp); - - /* Disable MEC parsing/prefetching */ - tmp = RREG32(mmCP_MEC_CNTL); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); - WREG32(mmCP_MEC_CNTL, tmp); - - if (reset_mask & AMDGPU_RESET_DMA) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - } - if (reset_mask & AMDGPU_RESET_DMA1) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - } - - gmc_v8_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } - - if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) { - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); - } - - if (reset_mask & AMDGPU_RESET_CP) { - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); - } - - if (reset_mask & AMDGPU_RESET_DMA) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1); - - if (reset_mask & AMDGPU_RESET_DMA1) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1); - - if (reset_mask & AMDGPU_RESET_DISPLAY) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1); - - if (reset_mask & AMDGPU_RESET_RLC) - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); - - if (reset_mask & AMDGPU_RESET_SEM) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); - - if (reset_mask & AMDGPU_RESET_IH) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1); - - if (reset_mask & AMDGPU_RESET_GRBM) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); - - if (reset_mask & AMDGPU_RESET_VMC) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1); - - if (reset_mask & AMDGPU_RESET_UVD) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); - - if (reset_mask & AMDGPU_RESET_VCE) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); - - if (reset_mask & AMDGPU_RESET_VCE) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); - - if (!(adev->flags & AMD_IS_APU)) { - if (reset_mask & AMDGPU_RESET_MC) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1); - } - - if (grbm_soft_reset) { - tmp = RREG32(mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - } - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - } - - /* Wait a little for things to settle down */ - udelay(50); - - gmc_v8_0_mc_resume(adev, &save); - udelay(50); - - vi_print_gpu_status_regs(adev); -} - static void vi_gpu_pci_config_reset(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; - u32 tmp, i; + u32 i;
dev_info(adev->dev, "GPU pci config reset\n");
- /* disable dpm? */ - - /* disable cg/pg */ - - /* Disable GFX parsing/prefetching */ - tmp = RREG32(mmCP_ME_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); - WREG32(mmCP_ME_CNTL, tmp); - - /* Disable MEC parsing/prefetching */ - tmp = RREG32(mmCP_MEC_CNTL); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); - WREG32(mmCP_MEC_CNTL, tmp); - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | - CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, - CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - - /* XXX other engines? */ - - /* halt the rlc, disable cp internal ints */ - //XXX - //gfx_v8_0_rlc_stop(adev); - - udelay(50); - - /* disable mem access */ - gmc_v8_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timed out !\n"); - } - /* disable BM */ pci_clear_master(adev->pdev); /* reset */ @@ -981,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun */ static int vi_asic_reset(struct amdgpu_device *adev) { - u32 reset_mask; - - reset_mask = vi_gpu_check_soft_reset(adev); - - if (reset_mask) - vi_set_bios_scratch_engine_hung(adev, true); - - /* try soft reset */ - vi_gpu_soft_reset(adev, reset_mask); - - reset_mask = vi_gpu_check_soft_reset(adev); - - /* try pci config reset */ - if (reset_mask && amdgpu_hard_reset) - vi_gpu_pci_config_reset(adev); + vi_set_bios_scratch_engine_hung(adev, true);
- reset_mask = vi_gpu_check_soft_reset(adev); + vi_gpu_pci_config_reset(adev);
- if (!reset_mask) - vi_set_bios_scratch_engine_hung(adev, false); + vi_set_bios_scratch_engine_hung(adev, false);
return 0; }
Posting is required after a pci config reset.
Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9d9d425e..7932ace 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1890,6 +1890,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
retry: r = amdgpu_asic_reset(adev); + /* post card */ + amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); r = amdgpu_resume(adev);
Ported from similar code in radeon.
Reviewed-by: Junwei Zhang Jerry.Zhang@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Ken Wang Qingqing.Wang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index cac03e7..697fb45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -806,15 +806,33 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) return 0; }
+/** + * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset + * + * Manually trigger a gpu reset at the next fence wait. + */ +static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "gpu reset\n"); + amdgpu_gpu_reset(adev); + + return 0; +} + static struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, + {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; #endif
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); #else return 0; #endif
It doesn't currently do anything and there's no need for it going forward since pci config reset will be required as a fallback even when we have fine grained reset implemented.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ---- 2 files changed, 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ce061d3..7cc76e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -76,7 +76,6 @@ extern int amdgpu_dpm; extern int amdgpu_smc_load_fw; extern int amdgpu_aspm; extern int amdgpu_runtime_pm; -extern int amdgpu_hard_reset; extern unsigned amdgpu_ip_block_mask; extern int amdgpu_bapm; extern int amdgpu_deep_color; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 99031ec..371f30d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -69,7 +69,6 @@ int amdgpu_dpm = -1; int amdgpu_smc_load_fw = 1; int amdgpu_aspm = -1; int amdgpu_runtime_pm = -1; -int amdgpu_hard_reset = 0; unsigned amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; int amdgpu_deep_color = 0; @@ -125,9 +124,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
-MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))"); -module_param_named(hard_reset, amdgpu_hard_reset, int, 0444); - MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
dri-devel@lists.freedesktop.org