The KFD API is quite inflexible in that it allows only mapping entire BOs at the same virtual address on all GPUs. This is incompatible with newer CUDA memory management APIs. (see https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__VA.html)
Instead of inventing more KFD APIs to fix this, the goal of this patch series is to enable ROCr to use the DRM render node API for more flexible VA mappings. It builds on the DMABuf export API that is being proposed for RDMA. DMABuf handles exported by KFD can be imported by libdrm-amdgpu and then used with amdgpu_bo_va_op.
This is a first proof of concept and request for comment.
A complete solution will likely need minor tweaks to the KFD API to allow memory allocation and import without a pre-determined virtual address.
Other options that were considered but rejected: - Using GEM API to create BO in KFD VMs. This would require significant plumbing to get those BOs registered with the KFD eviction fence mechanism and "no overcommitment" memory limits - Variation of the above using AMDGPU_GEM_CREATE_VM_ALWAYS_VALID to simplify validation. Doesn't work because it doesn't allow DMABuf exports - Creating KFD BOs with GEM handles. Doesn't help because there is no way to import GEM handles into libdrm-amdgpu
Felix Kuehling (4): drm/amdkfd: Improve amdgpu_vm_handle_moved drm/amdgpu: Attach eviction fence on alloc drm/amdgpu: update mappings not managed by KFD drm/amdgpu: Do bo_va ref counting for KFD BOs
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 99 ++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +- 5 files changed, 86 insertions(+), 42 deletions(-)
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API.
TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; }
+ /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; }
- r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r;
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->invalidated_lock);
/* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + }
r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r;
- if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate,
Am 17.03.22 um 01:20 schrieb Felix Kuehling:
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API.
Yes, that change is on my TODO list for quite a while as well.
TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code.
No, that won't work just yet.
We need to change the TLB flush detection for that, but I'm already working on those as well.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
Please update the TODO, with that done: Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; }
- /* TODO: Is this loop still needed, or could this be handled by
* amdgpu_vm_handle_moved, now that it can handle all BOs that are
* reserved under p->ticket?
amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo);*/
@@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; }
- r = amdgpu_vm_handle_moved(adev, vm);
- r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r)
r = amdgpu_vm_handle_moved(adev, vm);
r = amdgpu_vm_handle_moved(adev, vm, ticket);
if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- PTs have to be reserved!
*/ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
struct amdgpu_vm *vm,
{ struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv;struct ww_acquire_ctx *ticket)
- bool clear;
bool clear, unlock; int r;
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
@@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->invalidated_lock);
/* Try to reserve the BO to avoid clearing its ptes */
if (!amdgpu_vm_debug && dma_resv_trylock(resv))
if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false;
unlock = true;
/* The caller is already holding the reservation lock */
} else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
clear = false;
/* Somebody else is using the BO right now */unlock = false;
else
} else { clear = true;
unlock = false;
}
r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r;
if (!clear)
spin_lock(&vm->invalidated_lock); }if (unlock) dma_resv_unlock(resv);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
struct amdgpu_vm *vm,
int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate,struct ww_acquire_ctx *ticket);
Am 2022-03-17 um 04:21 schrieb Christian König:
Am 17.03.22 um 01:20 schrieb Felix Kuehling:
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API.
Yes, that change is on my TODO list for quite a while as well.
TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code.
No, that won't work just yet.
We need to change the TLB flush detection for that, but I'm already working on those as well.
Your TLB flushing patch series looks good to me.
There is one other issue, though. amdgpu_vm_handle_moved doesn't update the sync object, so I couldn't figure out I can wait for all the page table updates to finish.
Regards, Felix
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
Please update the TODO, with that done: Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate,
Am 17.03.22 um 20:11 schrieb Felix Kuehling:
Am 2022-03-17 um 04:21 schrieb Christian König:
Am 17.03.22 um 01:20 schrieb Felix Kuehling:
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API.
Yes, that change is on my TODO list for quite a while as well.
TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code.
No, that won't work just yet.
We need to change the TLB flush detection for that, but I'm already working on those as well.
Your TLB flushing patch series looks good to me.
There is one other issue, though. amdgpu_vm_handle_moved doesn't update the sync object, so I couldn't figure out I can wait for all the page table updates to finish.
Yes, and inside the CS we still need to go over all the BOs and gather the VM updates to wait for.
Not sure if you can do that in the KFD code as well. How exactly do you want to use it?
Regards, Christian.
Regards, Felix
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
Please update the TODO, with that done: Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate,
Am 2022-03-18 um 08:38 schrieb Christian König:
Am 17.03.22 um 20:11 schrieb Felix Kuehling:
Am 2022-03-17 um 04:21 schrieb Christian König:
Am 17.03.22 um 01:20 schrieb Felix Kuehling:
Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by the caller. This will be useful for handling extra BO VA mappings in KFD VMs that are managed through the render node API.
Yes, that change is on my TODO list for quite a while as well.
TODO: This may also allow simplification of amdgpu_cs_vm_handling. See the TODO comment in the code.
No, that won't work just yet.
We need to change the TLB flush detection for that, but I'm already working on those as well.
Your TLB flushing patch series looks good to me.
There is one other issue, though. amdgpu_vm_handle_moved doesn't update the sync object, so I couldn't figure out I can wait for all the page table updates to finish.
Yes, and inside the CS we still need to go over all the BOs and gather the VM updates to wait for.
Not sure if you can do that in the KFD code as well. How exactly do you want to use it?
Before resuming user mode queues after an eviction, KFD currently updates all the BOs and their mappings that it knows about. But it doesn't know about the mappings made using the render node API. So my plan was to use amdgpu_vm_handle_moved for that. But I don't get any fences for the page table operations queues by amdgpu_vm_handle_moved. I think amdgpu_cs has the same problem. So how do I reliably wait for those to finish before I resume user mode queues?
If amdgpu_vm_handle_moved were able to update the sync object, then I also wouldn't need explicit amdgpu_vm_bo_update calls any more, similar to what I suggested in the TODO comment in amdgpu_cs_vm_handling.
Regards, Felix
Regards, Christian.
Regards, Felix
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
Please update the TODO, with that done: Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d162243d8e78..10941f0d8dde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -826,6 +826,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* TODO: Is this loop still needed, or could this be handled by + * amdgpu_vm_handle_moved, now that it can handle all BOs that are + * reserved under p->ticket? + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -845,7 +849,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..50805613c38c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -414,7 +414,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..726b42c6d606 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2190,11 +2190,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va, *tmp; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { @@ -2212,17 +2213,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!amdgpu_vm_debug && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->invalidated_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..120a76aaae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -396,7 +396,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate,
Instead of attaching the eviction fence when a KFD BO is first mapped, attach it when it is allocated or imported. This in preparation to allow KFD BOs to be mapped using the render node API.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d23fdebd2552..019e6e363fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -361,6 +361,23 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, return ret; }
+static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + int ret = amdgpu_bo_reserve(bo, false); + + if (ret) + return ret; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (!ret) + amdgpu_bo_fence(bo, fence, true); + amdgpu_bo_unreserve(bo); + + return ret; +} + static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ -1621,6 +1638,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } else { + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain, + &avm->process_info->eviction_fence->base); + if (ret) + goto err_validate_bo; }
if (offset) @@ -1630,6 +1652,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed: err_pin_bo: +err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: @@ -1699,10 +1722,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (unlikely(ret)) return ret;
- /* The eviction fence should be removed by the last unmap. - * TODO: Log an error condition if the bo still has the eviction fence - * attached - */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, @@ -1819,19 +1838,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out_unreserve;
- if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. - */ - ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (ret) { - pr_debug("Validate failed\n"); - goto out_unreserve; - } - } - list_for_each_entry(entry, &mem->attachments, list) { if (entry->bo_va->base.vm != avm || entry->is_mapped) continue; @@ -1858,10 +1864,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( mem->mapped_to_gpu_memory); }
- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - amdgpu_bo_fence(bo, - &avm->process_info->eviction_fence->base, - true); ret = unreserve_bo_and_vms(&ctx, false, false);
goto out; @@ -1878,7 +1880,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; @@ -1919,15 +1920,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->mapped_to_gpu_memory); }
- /* If BO is unmapped from all VMs, unfence it. It can be evicted if - * required. - */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && - !mem->bo->tbo.pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence); - unreserve_out: unreserve_bo_and_vms(&ctx, false, false); out: @@ -2090,8 +2082,16 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, amdgpu_sync_create(&(*mem)->sync); (*mem)->is_imported = true;
+ ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain, + &avm->process_info->eviction_fence->base); + if (ret) + goto err_remove_mem; + return 0;
+err_remove_mem: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(mem); err_put_obj:
When restoring after an eviction, use amdgpu_vm_handle_moved to update BO VA mappings in KFD VMs that are not managed through the KFD API. This should allow using the render node API to create more flexible memory mappings in KFD VMs.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 019e6e363fd2..6f90ff4b485d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2535,6 +2535,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue;
kfd_mem_dmaunmap_attachment(mem, attachment); + /* TODO: Could amdgpu_vm_handle_moved do this? */ ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); @@ -2546,6 +2547,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (failed_size) pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
+ /* Update mappings not managed by KFD */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_device *adev = amdgpu_ttm_adev( + peer_vm->root.bo->tbo.bdev); + + ret = amdgpu_vm_handle_moved(adev, peer_vm, &ctx.ticket); + if (ret) { + pr_debug("Memory eviction: handle moved failed. Try again\n"); + goto validate_map_fail; + } + /* TODO: how to update the sync object? */ + } + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) {
This is needed to correctly handle BOs imported into the GEM API, which would otherwise get added twice to the same VM.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6f90ff4b485d..bf90b2fa2738 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -371,8 +371,16 @@ static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, return ret;
ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (!ret) - amdgpu_bo_fence(bo, fence, true); + if (ret) + goto unreserve_out; + + ret = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + if (ret) + goto unreserve_out; + + amdgpu_bo_fence(bo, fence, true); + +unreserve_out: amdgpu_bo_unreserve(bo);
return ret; @@ -716,6 +724,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; int i, ret;
if (!va) { @@ -779,7 +788,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -803,7 +817,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del(&attachment[i]->list); } @@ -820,7 +835,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); - amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); + if (--attachment->bo_va->ref_count == 0) + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment);
dri-devel@lists.freedesktop.org