Make it's name not feature but function descriptive.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d830a541ba89..d0e557cb5f1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1076,7 +1076,7 @@ struct amdgpu_device { struct ratelimit_state throttling_logging_rs; uint32_t ras_features;
- bool in_pci_err_recovery; + bool no_hw_access; struct pci_saved_state *pci_state;
struct amdgpu_reset_control *reset_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index bf5055642b82..60e945471a54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -340,7 +340,7 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, /* Check if hw access should be skipped because of hotplug or device error */ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) { - if (adev->in_pci_err_recovery) + if (adev->no_hw_access) return true;
#ifdef CONFIG_LOCKDEP @@ -5335,9 +5335,9 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
- adev->in_pci_err_recovery = true; + adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); - adev->in_pci_err_recovery = false; + adev->no_hw_access = false; if (r) goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index baa7d9778583..ce1577687ac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -232,7 +232,7 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, int i; struct amdgpu_device *adev = psp->adev;
- if (psp->adev->in_pci_err_recovery) + if (psp->adev->no_hw_access) return 0;
for (i = 0; i < adev->usec_timeout; i++) { @@ -261,7 +261,7 @@ psp_cmd_submit_buf(struct psp_context *psp, bool ras_intr = false; bool skip_unsupport = false;
- if (psp->adev->in_pci_err_recovery) + if (psp->adev->no_hw_access) return 0;
if (!drm_dev_enter(&psp->adev->ddev, &idx)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2408ed4c7d84..540fedf787c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7332,7 +7332,7 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
- if (!adev->in_pci_err_recovery) { + if (!adev->no_hw_access) { #ifndef BRING_UP_DEBUG if (amdgpu_async_gfx_ring) { r = gfx_v10_0_kiq_disable_kgq(adev); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index dc7d2e71aa6f..9526b46582c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -126,7 +126,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu, struct amdgpu_device *adev = smu->adev; int ret = 0, index = 0;
- if (smu->adev->in_pci_err_recovery) + if (smu->adev->no_hw_access) return 0;
index = smu_cmn_to_asic_specific_index(smu,
Problem: When device goes into sleep state due to prolonged innactivity (e.g. BACO sleep) and then hot unplugged, PCI core will try to wake up the device as part of unplug process. Since the device is gone all HW programming during rpm resume fails leading to a bad SW state later during pci remove handling.
Fix: Use a flag we use for PCIe error recovery to avoid accessing registres. This allows to succefully complete rpm resume sequence and finish pci remove.
v2: Renamed HW access block flag
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1081 Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d8db5929cdd9..b9d221fcb66d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1555,6 +1555,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!adev->runpm) return -EINVAL;
+ /* Avoids registers access if device is physically gone */ + if (!pci_device_is_present(adev->pdev)) + adev->no_hw_access = true; + + if (amdgpu_device_supports_px(drm_dev)) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
On Fri, May 21, 2021 at 4:41 PM Andrey Grodzovsky andrey.grodzovsky@amd.com wrote:
Problem: When device goes into sleep state due to prolonged
s/sleep state/runtime suspend/
innactivity (e.g. BACO sleep) and then hot unplugged,
inactivity
PCI core will try to wake up the device as part of unplug process. Since the device is gone all HW programming during rpm resume fails leading to a bad SW state later during pci remove handling.
Fix: Use a flag we use for PCIe error recovery to avoid accessing registres. This allows to succefully complete
successfully
rpm resume sequence and finish pci remove.
v2: Renamed HW access block flag
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1081 Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com
With the above comments fixed, the series is: Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d8db5929cdd9..b9d221fcb66d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1555,6 +1555,11 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!adev->runpm) return -EINVAL;
/* Avoids registers access if device is physically gone */
if (!pci_device_is_present(adev->pdev))
adev->no_hw_access = true;
if (amdgpu_device_supports_px(drm_dev)) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
-- 2.25.1
amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
dri-devel@lists.freedesktop.org