Based on Dmitries work, but splitting the code into page directory and page table handling makes it far more readable and (hopefully) more reliable.
Allocations of page tables are made from the SA on demand, that should still work fine since all page tables are of the same size.
Also using the fact that allocations from the SA are mostly continuously (except for end of buffer wraps and under very high memory pressure) to group updates send to the chipset specific code into larger chunks.
v3: mostly a rewrite of Dmitries previous patch. v4: fix some typos and coding style
Signed-off-by: Dmitry Cherkasov Dmitrii.Cherkasov@amd.com Signed-off-by: Christian König deathsimple@vodafone.de Tested-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 11 +- drivers/gpu/drm/radeon/radeon_gart.c | 322 ++++++++++++++++++++++++++-------- 3 files changed, 262 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9a46f7d..48e2337 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1576,7 +1576,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, vm->last_pfn); + radeon_ring_write(ring, rdev->vm_manager.max_pfn);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, vm->pd_gpu_addr >> 12); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b04c064..bc6b56b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -663,9 +663,14 @@ struct radeon_vm { struct list_head list; struct list_head va; unsigned id; - unsigned last_pfn; - u64 pd_gpu_addr; - struct radeon_sa_bo *sa_bo; + + /* contains the page directory */ + struct radeon_sa_bo *page_directory; + uint64_t pd_gpu_addr; + + /* array of page tables, one for each page directory entry */ + struct radeon_sa_bo **page_tables; + struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 753b7ca..b36b615 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -423,6 +423,18 @@ void radeon_gart_fini(struct radeon_device *rdev) */
/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** * radeon_vm_directory_size - returns the size of the page directory in bytes * * @rdev: radeon_device pointer @@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) */ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) { - return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); }
/** @@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
if (!rdev->vm_manager.enabled) { /* allocate enough for 2 full VM pts */ - size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - size, + RADEON_GPU_PAGE_ALIGN(size), RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", @@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) continue;
list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va; + int i;
- if (!vm->sa_bo) + if (!vm->page_directory) return;
list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
list_for_each_entry(bo_va, &vm->va, vm_list) { bo_va->valid = false; } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); }
/** @@ -546,6 +567,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) }
/** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + +/** * radeon_vm_alloc_pt - allocates a page table for a VM * * @rdev: radeon_device pointer @@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) */ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) { - struct radeon_vm *vm_evict; - int r; + unsigned pd_size, pts_size; u64 *pd_addr; - int tables_size; + int r;
if (vm == NULL) { return -EINVAL; }
- /* allocate enough to cover the current VM size */ - tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); - tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8); - - if (vm->sa_bo != NULL) { + if (vm->page_directory != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); @@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) }
retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, - tables_size, RADEON_GPU_PAGE_SIZE, false); + pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) { - if (list_empty(&rdev->vm_manager.lru_vm)) { + r = radeon_vm_evict(rdev, vm); + if (r) return r; - } - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); goto retry;
} else if (r) { return r; }
- pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); - memset(pd_addr, 0, tables_size); + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory); + memset(pd_addr, 0, pd_size); + + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + }
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, @@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, }
mutex_lock(&vm->mutex); - if (last_pfn > vm->last_pfn) { - /* release mutex and lock in right order */ - mutex_unlock(&vm->mutex); - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - /* and check again */ - if (last_pfn > vm->last_pfn) { - /* grow va space 32M by 32M */ - unsigned align = ((32 << 20) >> 12) - 1; - radeon_vm_free_pt(rdev, vm); - vm->last_pfn = (last_pfn + align) & ~align; - } - mutex_unlock(&rdev->vm_manager.lock); - } head = &vm->va; last_offset = 0; list_for_each_entry(tmp, &vm->va, vm_list) { @@ -865,6 +905,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) }
/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, + last_pt, count, incr, + RADEON_VM_PAGE_VALID); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, + incr, RADEON_VM_PAGE_VALID); + + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if (((last_pte + 8 * count) != pte) || + ((count + nptes) > 1 << 11)) { + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** * radeon_vm_bo_update_pte - map a bo into the vm page table * * @rdev: radeon_device pointer @@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw; - uint64_t pe, addr; - uint64_t pfn; + uint64_t addr; int r;
/* nothing to do if vm isn't bound */ - if (vm->sa_bo == NULL) + if (vm->page_directory == NULL) return 0;
bo_va = radeon_vm_bo_find(vm, bo); @@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } }
- /* estimate number of dw needed */ - /* reserve space for 32-bit padding */ - ndw = 32; - nptes = radeon_bo_ngpu_pages(bo);
- pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; + + /* estimate number of dw needed */ + /* semaphore, fence and padding */ + ndw = 32;
- /* handle cases where a bo spans several pdes */ - npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - - (pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 3; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3;
- /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 3; /* reserve space for pte addresses */ ndw += nptes * 2;
/* reserve space for one header for every 2k dwords */ ndw += (npdes >> 11) * 3; + /* reserve space for pde addresses */ ndw += npdes * 2;
@@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_fence_note_sync(vm->fence, ridx); }
- /* update page table entries */ - pe = vm->pd_gpu_addr; - pe += radeon_vm_directory_size(rdev); - pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; - - radeon_asic_vm_set_page(rdev, pe, addr, nptes, - RADEON_GPU_PAGE_SIZE, bo_va->flags); - - /* update page directory entries */ - addr = pe; - - pe = vm->pd_gpu_addr; - pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8; + r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + }
- radeon_asic_vm_set_page(rdev, pe, addr, npdes, - RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); + radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, + addr, bo_va->flags);
radeon_fence_unref(&vm->fence); r = radeon_fence_emit(rdev, &vm->fence, ridx); @@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush); + return 0; }
@@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->id = 0; vm->fence = NULL; - vm->last_pfn = 0; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va);
We want to use VMs without the IB pool in the future.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 23 +++-------------------- drivers/gpu/drm/radeon/radeon_kms.c | 11 ++++++++++- 3 files changed, 14 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bc6b56b..54cf9b5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1848,7 +1848,7 @@ extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index b36b615..3c4929b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -602,7 +602,6 @@ int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) * @vm: vm to bind * * Allocate a page table for the requested vm (cayman+). - * Also starts to populate the page table. * Returns 0 for success, error for failure. * * Global and local mutex must be locked! @@ -655,8 +654,7 @@ retry: }
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); - return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, - &rdev->ring_tmp_bo.bo->tbo.mem); + return 0; }
/** @@ -1241,30 +1239,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * @rdev: radeon_device pointer * @vm: requested vm * - * Init @vm (cayman+). - * Map the IB pool and any other shared objects into the VM - * by default as it's used by all VMs. - * Returns 0 for success, error for failure. + * Init @vm fields (cayman+). */ -int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { - struct radeon_bo_va *bo_va; - int r; - vm->id = 0; vm->fence = NULL; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); - - /* map the ib pool buffer at 0 in virtual address space, set - * read only - */ - bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); - r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, - RADEON_VM_PAGE_READABLE | - RADEON_VM_PAGE_SNOOPED); - return r; }
/** diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index cb8e94d..065980f 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -420,6 +420,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { struct radeon_fpriv *fpriv; + struct radeon_bo_va *bo_va; int r;
fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); @@ -427,7 +428,15 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; }
- r = radeon_vm_init(rdev, &fpriv->vm); + radeon_vm_init(rdev, &fpriv->vm); + + /* map the ib pool buffer read only into + * virtual address space */ + bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, + rdev->ring_tmp_bo.bo); + r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, + RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_SNOOPED); if (r) { radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv);
Make it possible to allocate a persistent page table.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 20 ++++++++++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 54cf9b5..8c42d54 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1851,6 +1851,7 @@ void radeon_vm_manager_fini(struct radeon_device *rdev); void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_fence(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d59eb59..023c67d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -478,6 +478,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, }
out: + radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 3c4929b..878a54d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -617,9 +617,6 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) }
if (vm->page_directory != NULL) { - /* update lru */ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return 0; }
@@ -653,11 +650,26 @@ retry: return -ENOMEM; }
- list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return 0; }
/** + * radeon_vm_add_to_lru - add VMs page table to LRU list + * + * @rdev: radeon_device pointer + * @vm: vm to add to LRU + * + * Add the allocated page table to the LRU list (cayman+). + * + * Global mutex must be locked! + */ +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); +} + +/** * radeon_vm_grab_id - allocate the next free VMID * * @rdev: radeon_device pointer
On Mon, Oct 8, 2012 at 11:55 AM, Christian König deathsimple@vodafone.de wrote:
Based on Dmitries work, but splitting the code into page directory and page table handling makes it far more readable and (hopefully) more reliable.
Allocations of page tables are made from the SA on demand, that should still work fine since all page tables are of the same size.
Also using the fact that allocations from the SA are mostly continuously (except for end of buffer wraps and under very high memory pressure) to group updates send to the chipset specific code into larger chunks.
v3: mostly a rewrite of Dmitries previous patch. v4: fix some typos and coding style
Signed-off-by: Dmitry Cherkasov Dmitrii.Cherkasov@amd.com Signed-off-by: Christian König deathsimple@vodafone.de Tested-by: Michel Dänzer michel.daenzer@amd.com
For the series:
Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/radeon/ni.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 11 +- drivers/gpu/drm/radeon/radeon_gart.c | 322 ++++++++++++++++++++++++++-------- 3 files changed, 262 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9a46f7d..48e2337 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1576,7 +1576,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->last_pfn);
radeon_ring_write(ring, rdev->vm_manager.max_pfn); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b04c064..bc6b56b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -663,9 +663,14 @@ struct radeon_vm { struct list_head list; struct list_head va; unsigned id;
unsigned last_pfn;
u64 pd_gpu_addr;
struct radeon_sa_bo *sa_bo;
/* contains the page directory */
struct radeon_sa_bo *page_directory;
uint64_t pd_gpu_addr;
/* array of page tables, one for each page directory entry */
struct radeon_sa_bo **page_tables;
struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence;
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 753b7ca..b36b615 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -423,6 +423,18 @@ void radeon_gart_fini(struct radeon_device *rdev) */
/**
- radeon_vm_num_pde - return the number of page directory entries
- @rdev: radeon_device pointer
- Calculate the number of page directory entries (cayman+).
- */
+static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{
return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
+}
+/**
- radeon_vm_directory_size - returns the size of the page directory in bytes
- @rdev: radeon_device pointer
@@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) */ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) {
return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8;
return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
}
/** @@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
if (!rdev->vm_manager.enabled) { /* allocate enough for 2 full VM pts */
size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8);
size = radeon_vm_directory_size(rdev);
size += rdev->vm_manager.max_pfn * 8; size *= 2; r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
size,
RADEON_GPU_PAGE_ALIGN(size), RADEON_GEM_DOMAIN_VRAM); if (r) { dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
@@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
if (vm->sa_bo == NULL)
if (vm->page_directory == NULL) continue; list_for_each_entry(bo_va, &vm->va, vm_list) {
@@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va;
int i;
if (!vm->sa_bo)
if (!vm->page_directory) return; list_del_init(&vm->list);
radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence);
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); list_for_each_entry(bo_va, &vm->va, vm_list) { bo_va->valid = false; }
if (vm->page_tables == NULL)
return;
for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
kfree(vm->page_tables);
}
/** @@ -546,6 +567,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) }
/**
- radeon_vm_evict - evict page table to make room for new one
- @rdev: radeon_device pointer
- @vm: VM we want to allocate something for
- Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
- Returns 0 for success, -ENOMEM for failure.
- Global and local mutex must be locked!
- */
+int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{
struct radeon_vm *vm_evict;
if (list_empty(&rdev->vm_manager.lru_vm))
return -ENOMEM;
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
struct radeon_vm, list);
if (vm_evict == vm)
return -ENOMEM;
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex);
return 0;
+}
+/**
- radeon_vm_alloc_pt - allocates a page table for a VM
- @rdev: radeon_device pointer
@@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) */ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) {
struct radeon_vm *vm_evict;
int r;
unsigned pd_size, pts_size; u64 *pd_addr;
int tables_size;
int r; if (vm == NULL) { return -EINVAL; }
/* allocate enough to cover the current VM size */
tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8);
if (vm->sa_bo != NULL) {
if (vm->page_directory != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
@@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) }
retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
tables_size, RADEON_GPU_PAGE_SIZE, false);
pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_directory, pd_size,
RADEON_GPU_PAGE_SIZE, false); if (r == -ENOMEM) {
if (list_empty(&rdev->vm_manager.lru_vm)) {
r = radeon_vm_evict(rdev, vm);
if (r) return r;
}
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex); goto retry; } else if (r) { return r; }
pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo);
vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
memset(pd_addr, 0, tables_size);
vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
/* Initially clear the page directory */
pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory);
memset(pd_addr, 0, pd_size);
pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return -ENOMEM;
} list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
@@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, }
mutex_lock(&vm->mutex);
if (last_pfn > vm->last_pfn) {
/* release mutex and lock in right order */
mutex_unlock(&vm->mutex);
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
/* and check again */
if (last_pfn > vm->last_pfn) {
/* grow va space 32M by 32M */
unsigned align = ((32 << 20) >> 12) - 1;
radeon_vm_free_pt(rdev, vm);
vm->last_pfn = (last_pfn + align) & ~align;
}
mutex_unlock(&rdev->vm_manager.lock);
} head = &vm->va; last_offset = 0; list_for_each_entry(tmp, &vm->va, vm_list) {
@@ -865,6 +905,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) }
/**
- radeon_vm_update_pdes - make sure that page directory is valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- Allocates new page tables if necessary
- and updates the page directory (cayman+).
- Returns 0 for success, error for failure.
- Global and local mutex must be locked!
- */
+static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
uint64_t start, uint64_t end)
+{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0;
uint64_t pt_idx;
int r;
start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
/* walk over the address space and update the page directory */
for (pt_idx = start; pt_idx <= end; ++pt_idx) {
uint64_t pde, pt;
if (vm->page_tables[pt_idx])
continue;
+retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_tables[pt_idx],
RADEON_VM_PTE_COUNT * 8,
RADEON_GPU_PAGE_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
pde = vm->pd_gpu_addr + pt_idx * 8;
pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pde,
last_pt, count, incr,
RADEON_VM_PAGE_VALID);
}
count = 1;
last_pde = pde;
last_pt = pt;
} else {
++count;
}
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
incr, RADEON_VM_PAGE_VALID);
}
return 0;
+}
+/**
- radeon_vm_update_ptes - make sure that page tables are valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- @dst: destination address to map to
- @flags: mapping flags
- Update the page tables in the range @start - @end (cayman+).
- Global and local mutex must be locked!
- */
+static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
+{
static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
unsigned count = 0;
uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
unsigned nptes;
uint64_t pte;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = RADEON_VM_PTE_COUNT - (addr & mask);
pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
pte += (addr & mask) * 8;
if (((last_pte + 8 * count) != pte) ||
((count + nptes) > 1 << 11)) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
}
count = nptes;
last_pte = pte;
last_dst = dst;
} else {
count += nptes;
}
addr += nptes;
dst += nptes * RADEON_GPU_PAGE_SIZE;
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
+}
+/**
- radeon_vm_bo_update_pte - map a bo into the vm page table
- @rdev: radeon_device pointer
@@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw;
uint64_t pe, addr;
uint64_t pfn;
uint64_t addr; int r; /* nothing to do if vm isn't bound */
if (vm->sa_bo == NULL)
if (vm->page_directory == NULL) return 0; bo_va = radeon_vm_bo_find(vm, bo);
@@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } }
/* estimate number of dw needed */
/* reserve space for 32-bit padding */
ndw = 32;
nptes = radeon_bo_ngpu_pages(bo);
pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE);
/* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
/* estimate number of dw needed */
/* semaphore, fence and padding */
ndw = 32;
/* handle cases where a bo spans several pdes */
npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) -
(pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE;
if (RADEON_VM_BLOCK_SIZE > 11)
/* reserve space for one header for every 2k dwords */
ndw += (nptes >> 11) * 3;
else
/* reserve space for one header for
every (1 << BLOCK_SIZE) entries */
ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3;
/* reserve space for one header for every 2k dwords */
ndw += (nptes >> 11) * 3; /* reserve space for pte addresses */ ndw += nptes * 2; /* reserve space for one header for every 2k dwords */ ndw += (npdes >> 11) * 3;
/* reserve space for pde addresses */ ndw += npdes * 2;
@@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_fence_note_sync(vm->fence, ridx); }
/* update page table entries */
pe = vm->pd_gpu_addr;
pe += radeon_vm_directory_size(rdev);
pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8;
radeon_asic_vm_set_page(rdev, pe, addr, nptes,
RADEON_GPU_PAGE_SIZE, bo_va->flags);
/* update page directory entries */
addr = pe;
pe = vm->pd_gpu_addr;
pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8;
r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
return r;
}
radeon_asic_vm_set_page(rdev, pe, addr, npdes,
RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID);
radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
addr, bo_va->flags); radeon_fence_unref(&vm->fence); r = radeon_fence_emit(rdev, &vm->fence, ridx);
@@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush);
return 0;
}
@@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->id = 0; vm->fence = NULL;
vm->last_pfn = 0; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va);
-- 1.7.9.5
dri-devel@lists.freedesktop.org