When unbinding a process from a device (initiated by amd_iommu_v2), the driver needs to make sure that process still exists in the process table. There is a possibility that amdkfd's own notifier handler - kfd_process_notifier_release() - was called before the unbind function and it already removed the process from the process table.
v2: Because there can be only one process with the specified pasid, and because *p can't be NULL inside the hash_for_each_rcu macro, it is more reasonable to just put the whole code inside the if statement that compares the pasid value. That way, when we exit hash_for_each_rcu, we simply exit the function as well.
Signed-off-by: Oded Gabbay oded.gabbay@gmail.com CC: Stable stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 60 +++++++++++++++++++------------- 1 file changed, 35 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ac00579..a64bc61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -404,42 +404,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
idx = srcu_read_lock(&kfd_processes_srcu);
+ /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) - if (p->pasid == pasid) - break; + if (p->pasid == pasid) {
- srcu_read_unlock(&kfd_processes_srcu, idx); + srcu_read_unlock(&kfd_processes_srcu, idx);
- BUG_ON(p->pasid != pasid); + pr_debug("Unbinding process %d from IOMMU\n", pasid);
- mutex_lock(&p->mutex); + mutex_lock(&p->mutex);
- if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) - kfd_dbgmgr_destroy(dev->dbgmgr); + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(dev->dbgmgr);
- pqm_uninit(&p->pqm); + pqm_uninit(&p->pqm);
- pdd = kfd_get_process_device_data(dev, p); + pdd = kfd_get_process_device_data(dev, p);
- if (!pdd) { - mutex_unlock(&p->mutex); - return; - } + if (!pdd) { + mutex_unlock(&p->mutex); + return; + }
- if (pdd->reset_wavefronts) { - dbgdev_wave_reset_wavefronts(pdd->dev, p); - pdd->reset_wavefronts = false; - } + if (pdd->reset_wavefronts) { + dbgdev_wave_reset_wavefronts(pdd->dev, p); + pdd->reset_wavefronts = false; + }
- /* - * Just mark pdd as unbound, because we still need it to call - * amd_iommu_unbind_pasid() in when the process exits. - * We don't call amd_iommu_unbind_pasid() here - * because the IOMMU called us. - */ - pdd->bound = false; + /* + * Just mark pdd as unbound, because we still need it + * to call amd_iommu_unbind_pasid() in when the + * process exits. + * We don't call amd_iommu_unbind_pasid() here + * because the IOMMU called us. + */ + pdd->bound = false;
- mutex_unlock(&p->mutex); + mutex_unlock(&p->mutex); + + return; + } + + srcu_read_unlock(&kfd_processes_srcu, idx); }
struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
amdkfd need to destroy the debug manager in case amdkfd's notifier function is called before the unbind function, because in that case, the unbind function will exit without destroying debug manager.
Signed-off-by: Oded Gabbay oded.gabbay@gmail.com CC: Stable stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a64bc61..7708d90 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -242,13 +242,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, pqm_uninit(&p->pqm);
/* Iterate over all process device data structure and check - * if we should reset all wavefronts */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) + * if we should delete debug managers and reset all wavefronts + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + if ((pdd->dev->dbgmgr) && + (pdd->dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(pdd->dev->dbgmgr); + if (pdd->reset_wavefronts) { pr_warn("amdkfd: Resetting all wave fronts\n"); dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; } + }
mutex_unlock(&p->mutex);
dri-devel@lists.freedesktop.org