From: Christian König christian.koenig@amd.com
Both are complex enough on their own.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 958 ---------------------------------- drivers/gpu/drm/radeon/radeon_vm.c | 981 +++++++++++++++++++++++++++++++++++ 3 files changed, 982 insertions(+), 959 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vm.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ed60caa..0943353 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o + ci_dpm.o dce6_afmt.o radeon_vm.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a8f9b46..2e72365 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -28,8 +28,6 @@ #include <drm/drmP.h> #include <drm/radeon_drm.h> #include "radeon.h" -#include "radeon_reg.h" -#include "radeon_trace.h"
/* * GART @@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev)
radeon_dummy_page_fini(rdev); } - -/* - * GPUVM - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages - * can be mapped as snooped (cached system pages) or unsnooped - * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, - * the kernel tells the the ring what VMID to use for that command - * buffer. VMIDs are allocated dynamically as commands are submitted. - * The userspace drivers maintain their own address space and the kernel - * sets up their pages tables accordingly when they submit their - * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. - */ - -/* - * vm helpers - * - * TODO bind a default page at vm initialization for default address - */ - -/** - * radeon_vm_num_pde - return the number of page directory entries - * - * @rdev: radeon_device pointer - * - * Calculate the number of page directory entries (cayman+). - */ -static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) -{ - return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; -} - -/** - * radeon_vm_directory_size - returns the size of the page directory in bytes - * - * @rdev: radeon_device pointer - * - * Calculate the size of the page directory in bytes (cayman+). - */ -static unsigned radeon_vm_directory_size(struct radeon_device *rdev) -{ - return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); -} - -/** - * radeon_vm_manager_init - init the vm manager - * - * @rdev: radeon_device pointer - * - * Init the vm manager (cayman+). - * Returns 0 for success, error for failure. - */ -int radeon_vm_manager_init(struct radeon_device *rdev) -{ - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; - int r; - unsigned size; - - if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - - r = radeon_asic_vm_init(rdev); - if (r) - return r; - - rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - } - return 0; -} - -/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** - * radeon_vm_manager_fini - tear down the vm manager - * - * @rdev: radeon_device pointer - * - * Tear down the VM manager (cayman+). - */ -void radeon_vm_manager_fini(struct radeon_device *rdev) -{ - struct radeon_vm *vm, *tmp; - int i; - - if (!rdev->vm_manager.enabled) - return; - - mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { - radeon_fence_unref(&rdev->vm_manager.active[i]); - } - radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); - rdev->vm_manager.enabled = false; -} - -/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for - * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_vm *vm_evict; - - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; - } - - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); -} - -/** - * radeon_vm_grab_id - allocate the next free VMID - * - * @rdev: radeon_device pointer - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). - * - * Global and local mutex must be locked! - */ -struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, - struct radeon_vm *vm, int ring) -{ - struct radeon_fence *best[RADEON_NUM_RINGS] = {}; - unsigned choices[2] = {}; - unsigned i; - - /* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) - return NULL; - - /* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); - - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < rdev->vm_manager.nvm; ++i) { - struct radeon_fence *fence = rdev->vm_manager.active[i]; - - if (fence == NULL) { - /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); - return NULL; - } - - if (radeon_fence_is_earlier(fence, best[fence->ring])) { - best[fence->ring] = fence; - choices[fence->ring == ring ? 0 : 1] = i; - } - } - - for (i = 0; i < 2; ++i) { - if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); - return rdev->vm_manager.active[choices[i]]; - } - } - - /* should never happen */ - BUG(); - return NULL; -} - -/** - * radeon_vm_fence - remember fence for vm - * - * @rdev: radeon_device pointer - * @vm: vm we want to fence - * @fence: fence to remember - * - * Fence the vm (cayman+). - * Set the fence used to protect page table and id. - * - * Global and local mutex must be locked! - */ -void radeon_vm_fence(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_fence *fence) -{ - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); -} - -/** - * radeon_vm_bo_find - find the bo_va for a specific vm & bo - * - * @vm: requested vm - * @bo: requested buffer object - * - * Find @bo inside the requested vm (cayman+). - * Search inside the @bos vm list for the requested vm - * Returns the found bo_va or NULL if none is found - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { - return bo_va; - } - } - return NULL; -} - -/** - * radeon_vm_bo_add - add a bo to a specific vm - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Add @bo into the requested vm (cayman+). - * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (bo_va == NULL) { - return NULL; - } - bo_va->vm = vm; - bo_va->bo = bo; - bo_va->soffset = 0; - bo_va->eoffset = 0; - bo_va->flags = 0; - bo_va->valid = false; - bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->vm_list); - - mutex_lock(&vm->mutex); - list_add(&bo_va->vm_list, &vm->va); - list_add_tail(&bo_va->bo_list, &bo->va); - mutex_unlock(&vm->mutex); - - return bo_va; -} - -/** - * radeon_vm_bo_set_addr - set bos virtual address inside a vm - * - * @rdev: radeon_device pointer - * @bo_va: bo_va to store the address - * @soffset: requested offset of the buffer in the VM address space - * @flags: attributes of pages (read/write/valid/etc.) - * - * Set offset of @bo_va (cayman+). - * Validate and set the offset requested within the vm address space. - * Returns 0 for success, error for failure. - * - * Object has to be reserved! - */ -int radeon_vm_bo_set_addr(struct radeon_device *rdev, - struct radeon_bo_va *bo_va, - uint64_t soffset, - uint32_t flags) -{ - uint64_t size = radeon_bo_size(bo_va->bo); - uint64_t eoffset, last_offset = 0; - struct radeon_vm *vm = bo_va->vm; - struct radeon_bo_va *tmp; - struct list_head *head; - unsigned last_pfn; - - if (soffset) { - /* make sure object fit at this offset */ - eoffset = soffset + size; - if (soffset >= eoffset) { - return -EINVAL; - } - - last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", - last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; - } - - } else { - eoffset = last_pfn = 0; - } - - mutex_lock(&vm->mutex); - head = &vm->va; - last_offset = 0; - list_for_each_entry(tmp, &vm->va, vm_list) { - if (bo_va == tmp) { - /* skip over currently modified bo */ - continue; - } - - if (soffset >= last_offset && eoffset <= tmp->soffset) { - /* bo can be added before this one */ - break; - } - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { - /* bo and tmp overlap, invalid offset */ - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); - mutex_unlock(&vm->mutex); - return -EINVAL; - } - last_offset = tmp->eoffset; - head = &tmp->vm_list; - } - - bo_va->soffset = soffset; - bo_va->eoffset = eoffset; - bo_va->flags = flags; - bo_va->valid = false; - list_move(&bo_va->vm_list, head); - - mutex_unlock(&vm->mutex); - return 0; -} - -/** - * radeon_vm_map_gart - get the physical address of a gart page - * - * @rdev: radeon_device pointer - * @addr: the unmapped addr - * - * Look up the physical address of the page that the pte resolves - * to (cayman+). - * Returns the physical address of the page. - */ -uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) -{ - uint64_t result; - - /* page table offset */ - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); - - return result; -} - -/** - * radeon_vm_page_flags - translate page flags to what the hw uses - * - * @flags: flags comming from userspace - * - * Translate the flags the userspace ABI uses to hw flags. - */ -static uint32_t radeon_vm_page_flags(uint32_t flags) -{ - uint32_t hw_flags = 0; - hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; - hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; - hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - hw_flags |= R600_PTE_SYSTEM; - hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; - } - return hw_flags; -} - -/** - * radeon_vm_update_pdes - make sure that page directory is valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) -{ - static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; - - uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; - int r; - - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - - /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { - uint64_t pde, pt; - - if (vm->page_tables[pt_idx]) - continue; - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, - last_pt, count, incr, - R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - count = 1; - last_pde = pde; - last_pt = pt; - } else { - ++count; - } - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, - incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - return 0; -} - -/** - * radeon_vm_update_ptes - make sure that page tables are valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to - * @flags: mapping flags - * - * Update the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! - */ -static void radeon_vm_update_ptes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) -{ - static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; - - uint64_t last_pte = ~0, last_dst = ~0; - unsigned count = 0; - uint64_t addr; - - start = start / RADEON_GPU_PAGE_SIZE; - end = end / RADEON_GPU_PAGE_SIZE; - - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; ) { - uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; - unsigned nptes; - uint64_t pte; - - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = RADEON_VM_PTE_COUNT - (addr & mask); - - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - pte += (addr & mask) * 8; - - if ((last_pte + 8 * count) != pte) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, - flags); - } - - count = nptes; - last_pte = pte; - last_dst = dst; - } else { - count += nptes; - } - - addr += nptes; - dst += nptes * RADEON_GPU_PAGE_SIZE; - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, flags); - } -} - -/** - * radeon_vm_bo_update - map a bo into the vm page table - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * @mem: ttm mem - * - * Fill in the page table entries for @bo (cayman+). - * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved & global and local mutex must be locked! - */ -int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, - struct ttm_mem_reg *mem) -{ - struct radeon_ib ib; - struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; - uint64_t addr; - int r; - - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - - bo_va = radeon_vm_bo_find(vm, bo); - if (bo_va == NULL) { - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); - return -EINVAL; - } - - if (!bo_va->soffset) { - dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", - bo, vm); - return -EINVAL; - } - - if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) - return 0; - - bo_va->flags &= ~RADEON_VM_PAGE_VALID; - bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; - if (mem) { - addr = mem->start << PAGE_SHIFT; - if (mem->mem_type != TTM_PL_SYSTEM) { - bo_va->flags |= RADEON_VM_PAGE_VALID; - bo_va->valid = true; - } - if (mem->mem_type == TTM_PL_TT) { - bo_va->flags |= RADEON_VM_PAGE_SYSTEM; - } else { - addr += rdev->vm_manager.vram_base_offset; - } - } else { - addr = 0; - bo_va->valid = false; - } - - trace_radeon_vm_bo_update(bo_va); - - nptes = radeon_bo_ngpu_pages(bo); - - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - - /* padding, etc. */ - ndw = 64; - - if (RADEON_VM_BLOCK_SIZE > 11) - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 4; - else - /* reserve space for one header for - every (1 << BLOCK_SIZE) entries */ - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; - - /* reserve space for pte addresses */ - ndw += nptes * 2; - - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); - if (r) - return r; - ib.length_dw = 0; - - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, - addr, radeon_vm_page_flags(bo_va->flags)); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - return 0; -} - -/** - * radeon_vm_bo_rmv - remove a bo to a specific vm - * - * @rdev: radeon_device pointer - * @bo_va: requested bo_va - * - * Remove @bo_va->bo from the requested vm (cayman+). - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and - * remove the ptes for @bo_va in the page table. - * Returns 0 for success. - * - * Object have to be reserved! - */ -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va) -{ - int r = 0; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - - kfree(bo_va); - return r; -} - -/** - * radeon_vm_bo_invalidate - mark the bo as invalid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Mark @bo as invalid (cayman+). - */ -void radeon_vm_bo_invalidate(struct radeon_device *rdev, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - bo_va->valid = false; - } -} - -/** - * radeon_vm_init - initialize a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Init @vm fields (cayman+). - */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) -{ - vm->id = 0; - vm->fence = NULL; - vm->last_flush = NULL; - vm->last_id_use = NULL; - mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); - INIT_LIST_HEAD(&vm->va); -} - -/** - * radeon_vm_fini - tear down a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Tear down @vm (cayman+). - * Unbind the VM and remove all bos from the vm bo list - */ -void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); - - if (!list_empty(&vm->va)) { - dev_err(rdev->dev, "still active bo inside vm\n"); - } - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { - list_del_init(&bo_va->vm_list); - r = radeon_bo_reserve(bo_va->bo, false); - if (!r) { - list_del_init(&bo_va->bo_list); - radeon_bo_unreserve(bo_va->bo); - kfree(bo_va); - } - } - radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); - radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); -} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c new file mode 100644 index 0000000..433b1eb --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -0,0 +1,981 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include <drm/drmP.h> +#include <drm/radeon_drm.h> +#include "radeon.h" +#include "radeon_trace.h" + +/* + * GPUVM + * GPUVM is similar to the legacy gart on older asics, however + * rather than there being a single global gart table + * for the entire GPU, there are multiple VM page tables active + * at any given time. The VM page tables can contain a mix + * vram pages and system memory pages and system memory pages + * can be mapped as snooped (cached system pages) or unsnooped + * (uncached system pages). + * Each VM has an ID associated with it and there is a page table + * associated with each VMID. When execting a command buffer, + * the kernel tells the the ring what VMID to use for that command + * buffer. VMIDs are allocated dynamically as commands are submitted. + * The userspace drivers maintain their own address space and the kernel + * sets up their pages tables accordingly when they submit their + * command buffers and a VMID is assigned. + * Cayman/Trinity support up to 8 active VMs at any given time; + * SI supports 16. + */ + +/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** + * radeon_vm_directory_size - returns the size of the page directory in bytes + * + * @rdev: radeon_device pointer + * + * Calculate the size of the page directory in bytes (cayman+). + */ +static unsigned radeon_vm_directory_size(struct radeon_device *rdev) +{ + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); +} + +/** + * radeon_vm_manager_init - init the vm manager + * + * @rdev: radeon_device pointer + * + * Init the vm manager (cayman+). + * Returns 0 for success, error for failure. + */ +int radeon_vm_manager_init(struct radeon_device *rdev) +{ + struct radeon_vm *vm; + struct radeon_bo_va *bo_va; + int r; + unsigned size; + + if (!rdev->vm_manager.enabled) { + /* allocate enough for 2 full VM pts */ + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; + size *= 2; + r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, + RADEON_GPU_PAGE_ALIGN(size), + RADEON_VM_PTB_ALIGN_SIZE, + RADEON_GEM_DOMAIN_VRAM); + if (r) { + dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", + (rdev->vm_manager.max_pfn * 8) >> 10); + return r; + } + + r = radeon_asic_vm_init(rdev); + if (r) + return r; + + rdev->vm_manager.enabled = true; + + r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); + if (r) + return r; + } + + /* restore page table */ + list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { + if (vm->page_directory == NULL) + continue; + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + } + return 0; +} + +/** + * radeon_vm_free_pt - free the page table for a specific vm + * + * @rdev: radeon_device pointer + * @vm: vm to unbind + * + * Free the page table of a specific vm (cayman+). + * + * Global and local mutex must be lock! + */ +static void radeon_vm_free_pt(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va; + int i; + + if (!vm->page_directory) + return; + + list_del_init(&vm->list); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); +} + +/** + * radeon_vm_manager_fini - tear down the vm manager + * + * @rdev: radeon_device pointer + * + * Tear down the VM manager (cayman+). + */ +void radeon_vm_manager_fini(struct radeon_device *rdev) +{ + struct radeon_vm *vm, *tmp; + int i; + + if (!rdev->vm_manager.enabled) + return; + + mutex_lock(&rdev->vm_manager.lock); + /* free all allocated page tables */ + list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&vm->mutex); + } + for (i = 0; i < RADEON_NUM_VM; ++i) { + radeon_fence_unref(&rdev->vm_manager.active[i]); + } + radeon_asic_vm_fini(rdev); + mutex_unlock(&rdev->vm_manager.lock); + + radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); + radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); + rdev->vm_manager.enabled = false; +} + +/** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + +/** + * radeon_vm_alloc_pt - allocates a page table for a VM + * + * @rdev: radeon_device pointer + * @vm: vm to bind + * + * Allocate a page table for the requested vm (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) +{ + unsigned pd_size, pd_entries, pts_size; + struct radeon_ib ib; + int r; + + if (vm == NULL) { + return -EINVAL; + } + + if (vm->page_directory != NULL) { + return 0; + } + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_VM_PTB_ALIGN_SIZE, false); + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + + } else if (r) { + return r; + } + + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, pd_entries * 2 + 64); + if (r) { + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, + 0, pd_entries, 0, 0); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + /* allocate page table array */ + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + } + + return 0; +} + +/** + * radeon_vm_add_to_lru - add VMs page table to LRU list + * + * @rdev: radeon_device pointer + * @vm: vm to add to LRU + * + * Add the allocated page table to the LRU list (cayman+). + * + * Global mutex must be locked! + */ +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + trace_radeon_vm_grab_id(vm->id, ring); + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; + } + } + + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + trace_radeon_vm_grab_id(vm->id, ring); + return rdev->vm_manager.active[choices[i]]; + } + } + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->last_id_use); + vm->last_id_use = radeon_fence_ref(fence); +} + +/** + * radeon_vm_bo_find - find the bo_va for a specific vm & bo + * + * @vm: requested vm + * @bo: requested buffer object + * + * Find @bo inside the requested vm (cayman+). + * Search inside the @bos vm list for the requested vm + * Returns the found bo_va or NULL if none is found + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + if (bo_va->vm == vm) { + return bo_va; + } + } + return NULL; +} + +/** + * radeon_vm_bo_add - add a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Add @bo into the requested vm (cayman+). + * Add @bo to the list of bos associated with the vm + * Returns newly added bo_va or NULL for failure + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (bo_va == NULL) { + return NULL; + } + bo_va->vm = vm; + bo_va->bo = bo; + bo_va->soffset = 0; + bo_va->eoffset = 0; + bo_va->flags = 0; + bo_va->valid = false; + bo_va->ref_count = 1; + INIT_LIST_HEAD(&bo_va->bo_list); + INIT_LIST_HEAD(&bo_va->vm_list); + + mutex_lock(&vm->mutex); + list_add(&bo_va->vm_list, &vm->va); + list_add_tail(&bo_va->bo_list, &bo->va); + mutex_unlock(&vm->mutex); + + return bo_va; +} + +/** + * radeon_vm_bo_set_addr - set bos virtual address inside a vm + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to store the address + * @soffset: requested offset of the buffer in the VM address space + * @flags: attributes of pages (read/write/valid/etc.) + * + * Set offset of @bo_va (cayman+). + * Validate and set the offset requested within the vm address space. + * Returns 0 for success, error for failure. + * + * Object has to be reserved! + */ +int radeon_vm_bo_set_addr(struct radeon_device *rdev, + struct radeon_bo_va *bo_va, + uint64_t soffset, + uint32_t flags) +{ + uint64_t size = radeon_bo_size(bo_va->bo); + uint64_t eoffset, last_offset = 0; + struct radeon_vm *vm = bo_va->vm; + struct radeon_bo_va *tmp; + struct list_head *head; + unsigned last_pfn; + + if (soffset) { + /* make sure object fit at this offset */ + eoffset = soffset + size; + if (soffset >= eoffset) { + return -EINVAL; + } + + last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; + if (last_pfn > rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + last_pfn, rdev->vm_manager.max_pfn); + return -EINVAL; + } + + } else { + eoffset = last_pfn = 0; + } + + mutex_lock(&vm->mutex); + head = &vm->va; + last_offset = 0; + list_for_each_entry(tmp, &vm->va, vm_list) { + if (bo_va == tmp) { + /* skip over currently modified bo */ + continue; + } + + if (soffset >= last_offset && eoffset <= tmp->soffset) { + /* bo can be added before this one */ + break; + } + if (eoffset > tmp->soffset && soffset < tmp->eoffset) { + /* bo and tmp overlap, invalid offset */ + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", + bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + mutex_unlock(&vm->mutex); + return -EINVAL; + } + last_offset = tmp->eoffset; + head = &tmp->vm_list; + } + + bo_va->soffset = soffset; + bo_va->eoffset = eoffset; + bo_va->flags = flags; + bo_va->valid = false; + list_move(&bo_va->vm_list, head); + + mutex_unlock(&vm->mutex); + return 0; +} + +/** + * radeon_vm_map_gart - get the physical address of a gart page + * + * @rdev: radeon_device pointer + * @addr: the unmapped addr + * + * Look up the physical address of the page that the pte resolves + * to (cayman+). + * Returns the physical address of the page. + */ +uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) +{ + uint64_t result; + + /* page table offset */ + result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; + + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); + + return result; +} + +/** + * radeon_vm_page_flags - translate page flags to what the hw uses + * + * @flags: flags comming from userspace + * + * Translate the flags the userspace ABI uses to hw flags. + */ +static uint32_t radeon_vm_page_flags(uint32_t flags) +{ + uint32_t hw_flags = 0; + hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; + hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; + hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + hw_flags |= R600_PTE_SYSTEM; + hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; + } + return hw_flags; +} + +/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, + last_pt, count, incr, + R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + incr, R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if ((last_pte + 8 * count) != pte) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** + * radeon_vm_bo_update - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @mem: ttm mem + * + * Fill in the page table entries for @bo (cayman+). + * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved & global and local mutex must be locked! + */ +int radeon_vm_bo_update(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_ib ib; + struct radeon_bo_va *bo_va; + unsigned nptes, npdes, ndw; + uint64_t addr; + int r; + + /* nothing to do if vm isn't bound */ + if (vm->page_directory == NULL) + return 0; + + bo_va = radeon_vm_bo_find(vm, bo); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + if (!bo_va->soffset) { + dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", + bo, vm); + return -EINVAL; + } + + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) + return 0; + + bo_va->flags &= ~RADEON_VM_PAGE_VALID; + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; + if (mem) { + addr = mem->start << PAGE_SHIFT; + if (mem->mem_type != TTM_PL_SYSTEM) { + bo_va->flags |= RADEON_VM_PAGE_VALID; + bo_va->valid = true; + } + if (mem->mem_type == TTM_PL_TT) { + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; + } else { + addr += rdev->vm_manager.vram_base_offset; + } + } else { + addr = 0; + bo_va->valid = false; + } + + trace_radeon_vm_bo_update(bo_va); + + nptes = radeon_bo_ngpu_pages(bo); + + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; + + /* padding, etc. */ + ndw = 64; + + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 4; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; + + /* reserve space for pte addresses */ + ndw += nptes * 2; + + /* reserve space for one header for every 2k dwords */ + ndw += (npdes >> 11) * 4; + + /* reserve space for pde addresses */ + ndw += npdes * 2; + + /* reserve space for clearing new page tables */ + ndw += npdes * 2 * RADEON_VM_PTE_COUNT; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; + + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, + addr, radeon_vm_page_flags(bo_va->flags)); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + return 0; +} + +/** + * radeon_vm_bo_rmv - remove a bo to a specific vm + * + * @rdev: radeon_device pointer + * @bo_va: requested bo_va + * + * Remove @bo_va->bo from the requested vm (cayman+). + * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and + * remove the ptes for @bo_va in the page table. + * Returns 0 for success. + * + * Object have to be reserved! + */ +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + int r = 0; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&bo_va->vm->mutex); + if (bo_va->soffset) { + r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + } + mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); + mutex_unlock(&bo_va->vm->mutex); + list_del(&bo_va->bo_list); + + kfree(bo_va); + return r; +} + +/** + * radeon_vm_bo_invalidate - mark the bo as invalid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Mark @bo as invalid (cayman+). + */ +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + bo_va->valid = false; + } +} + +/** + * radeon_vm_init - initialize a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Init @vm fields (cayman+). + */ +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{ + vm->id = 0; + vm->fence = NULL; + vm->last_flush = NULL; + vm->last_id_use = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->list); + INIT_LIST_HEAD(&vm->va); +} + +/** + * radeon_vm_fini - tear down a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Tear down @vm (cayman+). + * Unbind the VM and remove all bos from the vm bo list + */ +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int r; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&rdev->vm_manager.lock); + + if (!list_empty(&vm->va)) { + dev_err(rdev->dev, "still active bo inside vm\n"); + } + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { + list_del_init(&bo_va->vm_list); + r = radeon_bo_reserve(bo_va->bo, false); + if (!r) { + list_del_init(&bo_va->bo_list); + radeon_bo_unreserve(bo_va->bo); + kfree(bo_va); + } + } + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->last_id_use); + mutex_unlock(&vm->mutex); +}
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 3 +++ drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_ring.c | 16 +++++++--------- drivers/gpu/drm/radeon/radeon_vm.c | 31 ++++++++++++++++++++++++++++--- 4 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7337b35..0c03bcb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2796,6 +2796,9 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f28a8d8..0b3ba38 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -430,10 +430,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); }
- if (!r) { - radeon_vm_fence(rdev, vm, parser->ib.fence); - } - out: radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index fa14011..665591a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -153,11 +153,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; }
- /* if we can't remember our last VM flush then flush now! */ - /* XXX figure out why we have to flush for every IB */ - if (ib->vm /*&& !ib->vm->last_flush*/) { - radeon_ring_vm_flush(rdev, ib->ring, ib->vm); - } + if (ib->vm) + radeon_vm_flush(rdev, ib->vm, ib->ring); + if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -172,10 +170,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } - /* we just flushed the VM, remember that */ - if (ib->vm && !ib->vm->last_flush) { - ib->vm->last_flush = radeon_fence_ref(ib->fence); - } + + if (ib->vm) + radeon_vm_fence(rdev, ib->vm, ib->fence); + radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 433b1eb..5160176 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -379,6 +379,27 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, }
/** + * radeon_vm_flush - hardware flush the vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to flush + * @ring: ring to use for flush + * + * Flush the vm (cayman+). + * + * Global and local mutex must be locked! + */ +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring) +{ + /* if we can't remember our last VM flush then flush now! */ + /* XXX figure out why we have to flush all the time */ + if (!vm->last_flush || true) + radeon_ring_vm_flush(rdev, ring, vm); +} + +/** * radeon_vm_fence - remember fence for vm * * @rdev: radeon_device pointer @@ -394,14 +415,18 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence) { - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(fence);
+ radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + radeon_fence_unref(&vm->last_id_use); vm->last_id_use = radeon_fence_ref(fence); + + /* we just flushed the VM, remember that */ + if (!vm->last_flush) + vm->last_flush = radeon_fence_ref(fence); }
/**
From: Christian König christian.koenig@amd.com
No need to make it more complicated than necessary, just allocate the page directory as normal BO and flush whenever the address change.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 8 +- drivers/gpu/drm/radeon/radeon_cs.c | 36 +++++--- drivers/gpu/drm/radeon/radeon_kms.c | 4 +- drivers/gpu/drm/radeon/radeon_vm.c | 172 +++++++++++++++++++++++------------- 4 files changed, 143 insertions(+), 77 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0c03bcb..f4fb984 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -860,7 +860,7 @@ struct radeon_vm { unsigned id;
/* contains the page directory */ - struct radeon_sa_bo *page_directory; + struct radeon_bo *page_directory; uint64_t pd_gpu_addr;
/* array of page tables, one for each page directory entry */ @@ -1008,6 +1008,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; + struct radeon_bo_list *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -2790,9 +2791,12 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_bo_list *radeon_vm_add_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0b3ba38..335cc84 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -109,6 +109,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); } + + if (p->cs_flags & RADEON_CS_USE_VM) + p->vm_bos = radeon_vm_add_bos(p->rdev, p->ib.vm, + &p->validated); + return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); }
@@ -320,6 +325,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); + kfree(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); @@ -359,24 +365,28 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; }
-static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { - struct radeon_device *rdev = parser->rdev; - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - int r; + struct radeon_device *rdev = p->rdev; + int i, r;
- r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); - if (r) { + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) return r; - } - list_for_each_entry(lobj, &parser->validated, tv.head) { - bo = lobj->bo; - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); - if (r) { + + for (i = 0; i < p->nrelocs; i++) { + struct radeon_bo *bo; + + /* ignore duplicates */ + if (p->relocs_ptr[i] != &p->relocs[i]) + continue; + + bo = p->relocs[i].robj; + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + if (r) return r; - } } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eea70b0..980bea8 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -544,7 +544,9 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; }
- radeon_vm_init(rdev, &fpriv->vm); + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) + return r;
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 5160176..b50e50f 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -117,7 +117,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) + if (vm->page_tables == NULL) continue;
list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -143,12 +143,10 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_bo_va *bo_va; int i;
- if (!vm->page_directory) + if (!vm->page_tables) return;
list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - list_for_each_entry(bo_va, &vm->va, vm_list) { bo_va->valid = false; } @@ -237,69 +235,19 @@ static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) */ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) { - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; + unsigned pts_size;
- if (vm == NULL) { + if (vm == NULL) return -EINVAL; - }
- if (vm->page_directory != NULL) { + if (vm->page_tables != NULL) return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush);
/* allocate page table array */ pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - if (vm->page_tables == NULL) { DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); return -ENOMEM; }
@@ -323,6 +271,33 @@ void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) }
/** + * radeon_vm_add_bos - add the vm BOs to a CS list + * + * @vm: vm providing the BOs + * @head: head of CS validation list + */ +struct radeon_bo_list *radeon_vm_add_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) +{ + struct radeon_bo_list *list; + + list = kmalloc(sizeof(struct radeon_bo_list), GFP_KERNEL); + if (!list) + return NULL; + + /* add the vm page table to the list */ + list[0].bo = vm->page_directory; + list[0].written = 1; + list[0].domain = RADEON_GEM_DOMAIN_VRAM; + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[0].tv.bo = &vm->page_directory->tbo; + radeon_bo_list_add_object(&list[0], head); + + return list; +} + +/** * radeon_vm_grab_id - allocate the next free VMID * * @rdev: radeon_device pointer @@ -393,10 +368,14 @@ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush all the time */ - if (!vm->last_flush || true) + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { + vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, ring, vm); + } }
/** @@ -668,7 +647,7 @@ retry: return r; }
- pde = vm->pd_gpu_addr + pt_idx * 8; + pde = radeon_bo_gpu_offset(vm->page_directory) + pt_idx * 8;
pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
@@ -800,7 +779,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, int r;
/* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) + if (vm->page_tables == NULL) return 0;
bo_va = radeon_vm_bo_find(vm, bo); @@ -957,8 +936,15 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * * Init @vm fields (cayman+). */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { + unsigned pd_size, pd_entries; + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + int r; + vm->id = 0; vm->fence = NULL; vm->last_flush = NULL; @@ -966,6 +952,68 @@ void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); + if (r) + return r; + + /* Initially clear the page directory */ + + memset(&tv, 0, sizeof(tv)); + tv.bo = &vm->page_directory->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + goto error_free; + + r = ttm_bo_validate(&vm->page_directory->tbo, + &vm->page_directory->placement, + true, false); + if (r) + goto error_unreserve; + + vm->pd_gpu_addr = radeon_bo_gpu_offset(vm->page_directory); + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, pd_entries * 2 + 64); + if (r) + goto error_unreserve; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, + 0, pd_entries, 0, 0); + + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error_unreserve; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&vm->last_flush); + + radeon_ib_free(rdev, &ib); + + return 0; + +error_unreserve: + ttm_eu_backoff_reservation(&ticket, &head); + +error_free: + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + }
/** @@ -1003,4 +1051,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_id_use); mutex_unlock(&vm->mutex); + + radeon_bo_unref(&vm->page_directory); }
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 12 +- drivers/gpu/drm/radeon/radeon_cs.c | 5 - drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_vm.c | 389 ++++++++++++--------------------- 4 files changed, 141 insertions(+), 266 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f4fb984..375dbf5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -854,8 +854,12 @@ struct radeon_mec { #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6)
+struct radeon_vm_pt { + struct radeon_bo *bo; + uint64_t addr; +}; + struct radeon_vm { - struct list_head list; struct list_head va; unsigned id;
@@ -864,7 +868,7 @@ struct radeon_vm { uint64_t pd_gpu_addr;
/* array of page tables, one for each page directory entry */ - struct radeon_sa_bo **page_tables; + struct radeon_vm_pt *page_tables;
struct mutex mutex; /* last fence for cs using this vm */ @@ -877,9 +881,7 @@ struct radeon_vm {
struct radeon_vm_manager { struct mutex lock; - struct list_head lru_vm; struct radeon_fence *active[RADEON_NUM_VM]; - struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; @@ -2793,11 +2795,9 @@ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_bo_list *radeon_vm_add_bos(struct radeon_device *rdev, struct radeon_vm *vm, struct list_head *head); -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 335cc84..849f997 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -420,10 +420,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_alloc_pt(rdev, vm); - if (r) { - goto out; - } r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; @@ -441,7 +437,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, }
out: - radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index fa7841b..e58dbab 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1198,7 +1198,6 @@ int radeon_device_init(struct radeon_device *rdev, * Max GPUVM size for cayman and SI is 40 bits. */ rdev->vm_manager.max_pfn = 1 << 20; - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
/* Set asic functions */ r = radeon_asic_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index b50e50f..5441a6a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -84,83 +84,19 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) */ int radeon_vm_manager_init(struct radeon_device *rdev) { - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; int r; - unsigned size;
if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - r = radeon_asic_vm_init(rdev); if (r) return r;
rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_tables == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } } return 0; }
/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_tables) - return; - - list_del_init(&vm->list); - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** * radeon_vm_manager_fini - tear down the vm manager * * @rdev: radeon_device pointer @@ -169,105 +105,17 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, */ void radeon_vm_manager_fini(struct radeon_device *rdev) { - struct radeon_vm *vm, *tmp; int i;
if (!rdev->vm_manager.enabled) return;
mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { + for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); - } radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); rdev->vm_manager.enabled = false; -} - -/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for - * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_vm *vm_evict; - - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pts_size; - - if (vm == NULL) - return -EINVAL; - - if (vm->page_tables != NULL) - return 0; - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - return -ENOMEM; - } - - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + mutex_unlock(&rdev->vm_manager.lock); }
/** @@ -281,8 +129,10 @@ struct radeon_bo_list *radeon_vm_add_bos(struct radeon_device *rdev, struct list_head *head) { struct radeon_bo_list *list; + unsigned i, idx, size;
- list = kmalloc(sizeof(struct radeon_bo_list), GFP_KERNEL); + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_bo_list); + list = kmalloc(size, GFP_KERNEL); if (!list) return NULL;
@@ -294,6 +144,18 @@ struct radeon_bo_list *radeon_vm_add_bos(struct radeon_device *rdev, list[0].tv.bo = &vm->page_directory->tbo; radeon_bo_list_add_object(&list[0], head);
+ for (i = 0, idx = 1; i < radeon_vm_num_pdes(rdev); i++) { + if (!vm->page_tables[i].bo) + continue; + + list[idx].bo = vm->page_tables[i].bo; + list[idx].written = 1; + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].tv.bo = &list[idx].bo->tbo; + radeon_bo_list_add_object(&list[idx++], head); + } + return list; }
@@ -475,6 +337,63 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, }
/** + * radeon_vm_clear_bo - initially clear the page dir/table + * + * @rdev: radeon_device pointer + * @bo: bo to clear + */ +static int radeon_vm_clear_bo(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + unsigned entries; + uint64_t addr; + int r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error; + + addr = radeon_bo_gpu_offset(bo); + entries = radeon_bo_size(bo) / 8; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, entries * 2 + 64); + if (r) + goto error; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_ib_free(rdev, &ib); + + return 0; + +error: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +/** * radeon_vm_bo_set_addr - set bos virtual address inside a vm * * @rdev: radeon_device pointer @@ -498,7 +417,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_vm *vm = bo_va->vm; struct radeon_bo_va *tmp; struct list_head *head; - unsigned last_pfn; + unsigned last_pfn, pt_idx; + int r;
if (soffset) { /* make sure object fit at this offset */ @@ -549,8 +469,36 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, bo_va->valid = false; list_move(&bo_va->vm_list, head);
+ soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + radeon_bo_unreserve(bo_va->bo); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { + if (vm->page_tables[pt_idx].bo) + continue; + + vm->page_tables[pt_idx].addr = 0; + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_tables[pt_idx].bo); + if (r) + return r; + + mutex_unlock(&vm->mutex); + r = radeon_vm_clear_bo(rdev, vm->page_tables[pt_idx].bo); + mutex_lock(&vm->mutex); + if (r) { + radeon_bo_unref(&vm->page_tables[pt_idx].bo); + vm->page_tables[pt_idx].bo = NULL; + return r; + } + } + mutex_unlock(&vm->mutex); - return 0; + return radeon_bo_reserve(bo_va->bo, false); }
/** @@ -620,37 +568,21 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev, uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0; uint64_t pt_idx; - int r;
start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
/* walk over the address space and update the page directory */ for (pt_idx = start; pt_idx <= end; ++pt_idx) { - uint64_t pde, pt; + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; + uint64_t pde, pt = radeon_bo_gpu_offset(bo);
- if (vm->page_tables[pt_idx]) + if (vm->page_tables[pt_idx].addr == pt) continue; - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } + vm->page_tables[pt_idx].addr = pt;
pde = radeon_bo_gpu_offset(vm->page_directory) + pt_idx * 8;
- pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - if (((last_pde + 8 * count) != pde) || ((last_pt + incr * count) != pt)) {
@@ -658,10 +590,6 @@ retry: radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); }
count = 1; @@ -672,15 +600,10 @@ retry: } }
- if (count) { + if (count) radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, R600_PTE_VALID);
- count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - return 0; }
@@ -724,7 +647,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, else nptes = RADEON_VM_PTE_COUNT - (addr & mask);
- pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); pte += (addr & mask) * 8;
if ((last_pte + 8 * count) != pte) { @@ -778,10 +701,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, uint64_t addr; int r;
- /* nothing to do if vm isn't bound */ - if (vm->page_tables == NULL) - return 0; - bo_va = radeon_vm_bo_find(vm, bo); if (bo_va == NULL) { dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); @@ -938,11 +857,7 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, */ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { - unsigned pd_size, pd_entries; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; - struct radeon_ib ib; + unsigned pd_size, pd_entries, pts_size; int r;
vm->id = 0; @@ -950,70 +865,33 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->last_flush = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va);
pd_size = radeon_vm_directory_size(rdev); pd_entries = radeon_vm_num_pdes(rdev);
+ /* allocate page table array */ + pts_size = pd_entries * sizeof(struct radeon_vm_pt); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + return -ENOMEM; + } + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, RADEON_GEM_DOMAIN_VRAM, NULL, &vm->page_directory); if (r) return r;
- /* Initially clear the page directory */ - - memset(&tv, 0, sizeof(tv)); - tv.bo = &vm->page_directory->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head); - if (r) - goto error_free; - - r = ttm_bo_validate(&vm->page_directory->tbo, - &vm->page_directory->placement, - true, false); - if (r) - goto error_unreserve; - - vm->pd_gpu_addr = radeon_bo_gpu_offset(vm->page_directory); - - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) - goto error_unreserve; - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) - goto error_unreserve; - - ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_fence_unref(&vm->last_flush); - - radeon_ib_free(rdev, &ib); + r = radeon_vm_clear_bo(rdev, vm->page_directory); + if (r) { + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + }
return 0; - -error_unreserve: - ttm_eu_backoff_reservation(&ticket, &head); - -error_free: - radeon_bo_unref(&vm->page_directory); - vm->page_directory = NULL; - return r; - }
/** @@ -1028,12 +906,9 @@ error_free: void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va, *tmp; - int r; + int i, r;
- mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock);
if (!list_empty(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); @@ -1047,10 +922,16 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) kfree(bo_va); } } + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_bo_unref(&vm->page_tables[i].bo); + kfree(vm->page_tables); + + radeon_bo_unref(&vm->page_directory); + radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex);
- radeon_bo_unref(&vm->page_directory); + mutex_unlock(&vm->mutex); }
From: Christian König christian.koenig@amd.com
Not needed any more.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_device.c | 3 +-- drivers/gpu/drm/radeon/radeon_ring.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_vm.c | 10 +++------- 5 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 375dbf5..daad682 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -880,7 +880,6 @@ struct radeon_vm { };
struct radeon_vm_manager { - struct mutex lock; struct radeon_fence *active[RADEON_NUM_VM]; uint32_t max_pfn; /* number of VMIDs */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 849f997..7c762a7 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -418,7 +418,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev);
- mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); r = radeon_bo_vm_update_pte(parser, vm); if (r) { @@ -426,8 +425,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); - radeon_semaphore_sync_to(parser->ib.semaphore, - radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -438,7 +435,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
out: mutex_unlock(&vm->mutex); - mutex_unlock(&rdev->vm_manager.lock); return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e58dbab..7db44de 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1191,8 +1191,7 @@ int radeon_device_init(struct radeon_device *rdev, r = radeon_gem_init(rdev); if (r) return r; - /* initialize vm here */ - mutex_init(&rdev->vm_manager.lock); + /* Adjust VM size here. * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 665591a..5321e24 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; }
+ /* grab a vm id if necessary */ + if (ib->vm) { + struct radeon_fence *vm_id_fence; + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); + } + /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 5441a6a..da1a047 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -110,12 +110,10 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) if (!rdev->vm_manager.enabled) return;
- mutex_lock(&rdev->vm_manager.lock); for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); radeon_asic_vm_fini(rdev); rdev->vm_manager.enabled = false; - mutex_unlock(&rdev->vm_manager.lock); }
/** @@ -688,7 +686,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, * Fill in the page table entries for @bo (cayman+). * Returns 0 for success, -EINVAL for failure. * - * Object have to be reserved & global and local mutex must be locked! + * Object have to be reserved and mutex must be locked! */ int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, @@ -814,12 +812,10 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, { int r = 0;
- mutex_lock(&rdev->vm_manager.lock); mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { + if (bo_va->soffset) r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); mutex_unlock(&bo_va->vm->mutex); list_del(&bo_va->bo_list);
On Thu, Feb 20, 2014 at 3:20 PM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Both are complex enough on their own.
Signed-off-by: Christian König christian.koenig@amd.com
For the series:
Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 958 ---------------------------------- drivers/gpu/drm/radeon/radeon_vm.c | 981 +++++++++++++++++++++++++++++++++++ 3 files changed, 982 insertions(+), 959 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vm.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ed60caa..0943353 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
ci_dpm.o dce6_afmt.o
ci_dpm.o dce6_afmt.o radeon_vm.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a8f9b46..2e72365 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -28,8 +28,6 @@ #include <drm/drmP.h> #include <drm/radeon_drm.h> #include "radeon.h" -#include "radeon_reg.h" -#include "radeon_trace.h"
/*
- GART
@@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev)
radeon_dummy_page_fini(rdev);
}
-/*
- GPUVM
- GPUVM is similar to the legacy gart on older asics, however
- rather than there being a single global gart table
- for the entire GPU, there are multiple VM page tables active
- at any given time. The VM page tables can contain a mix
- vram pages and system memory pages and system memory pages
- can be mapped as snooped (cached system pages) or unsnooped
- (uncached system pages).
- Each VM has an ID associated with it and there is a page table
- associated with each VMID. When execting a command buffer,
- the kernel tells the the ring what VMID to use for that command
- buffer. VMIDs are allocated dynamically as commands are submitted.
- The userspace drivers maintain their own address space and the kernel
- sets up their pages tables accordingly when they submit their
- command buffers and a VMID is assigned.
- Cayman/Trinity support up to 8 active VMs at any given time;
- SI supports 16.
- */
-/*
- vm helpers
- TODO bind a default page at vm initialization for default address
- */
-/**
- radeon_vm_num_pde - return the number of page directory entries
- @rdev: radeon_device pointer
- Calculate the number of page directory entries (cayman+).
- */
-static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) -{
return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
-}
-/**
- radeon_vm_directory_size - returns the size of the page directory in bytes
- @rdev: radeon_device pointer
- Calculate the size of the page directory in bytes (cayman+).
- */
-static unsigned radeon_vm_directory_size(struct radeon_device *rdev) -{
return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
-}
-/**
- radeon_vm_manager_init - init the vm manager
- @rdev: radeon_device pointer
- Init the vm manager (cayman+).
- Returns 0 for success, error for failure.
- */
-int radeon_vm_manager_init(struct radeon_device *rdev) -{
struct radeon_vm *vm;
struct radeon_bo_va *bo_va;
int r;
unsigned size;
if (!rdev->vm_manager.enabled) {
/* allocate enough for 2 full VM pts */
size = radeon_vm_directory_size(rdev);
size += rdev->vm_manager.max_pfn * 8;
size *= 2;
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
RADEON_GPU_PAGE_ALIGN(size),
RADEON_VM_PTB_ALIGN_SIZE,
RADEON_GEM_DOMAIN_VRAM);
if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
(rdev->vm_manager.max_pfn * 8) >> 10);
return r;
}
r = radeon_asic_vm_init(rdev);
if (r)
return r;
rdev->vm_manager.enabled = true;
r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
if (r)
return r;
}
/* restore page table */
list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
if (vm->page_directory == NULL)
continue;
list_for_each_entry(bo_va, &vm->va, vm_list) {
bo_va->valid = false;
}
}
return 0;
-}
-/**
- radeon_vm_free_pt - free the page table for a specific vm
- @rdev: radeon_device pointer
- @vm: vm to unbind
- Free the page table of a specific vm (cayman+).
- Global and local mutex must be lock!
- */
-static void radeon_vm_free_pt(struct radeon_device *rdev,
struct radeon_vm *vm)
-{
struct radeon_bo_va *bo_va;
int i;
if (!vm->page_directory)
return;
list_del_init(&vm->list);
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
list_for_each_entry(bo_va, &vm->va, vm_list) {
bo_va->valid = false;
}
if (vm->page_tables == NULL)
return;
for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
kfree(vm->page_tables);
-}
-/**
- radeon_vm_manager_fini - tear down the vm manager
- @rdev: radeon_device pointer
- Tear down the VM manager (cayman+).
- */
-void radeon_vm_manager_fini(struct radeon_device *rdev) -{
struct radeon_vm *vm, *tmp;
int i;
if (!rdev->vm_manager.enabled)
return;
mutex_lock(&rdev->vm_manager.lock);
/* free all allocated page tables */
list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
mutex_lock(&vm->mutex);
radeon_vm_free_pt(rdev, vm);
mutex_unlock(&vm->mutex);
}
for (i = 0; i < RADEON_NUM_VM; ++i) {
radeon_fence_unref(&rdev->vm_manager.active[i]);
}
radeon_asic_vm_fini(rdev);
mutex_unlock(&rdev->vm_manager.lock);
radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
rdev->vm_manager.enabled = false;
-}
-/**
- radeon_vm_evict - evict page table to make room for new one
- @rdev: radeon_device pointer
- @vm: VM we want to allocate something for
- Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
- Returns 0 for success, -ENOMEM for failure.
- Global and local mutex must be locked!
- */
-static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{
struct radeon_vm *vm_evict;
if (list_empty(&rdev->vm_manager.lru_vm))
return -ENOMEM;
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
struct radeon_vm, list);
if (vm_evict == vm)
return -ENOMEM;
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex);
return 0;
-}
-/**
- radeon_vm_alloc_pt - allocates a page table for a VM
- @rdev: radeon_device pointer
- @vm: vm to bind
- Allocate a page table for the requested vm (cayman+).
- Returns 0 for success, error for failure.
- Global and local mutex must be locked!
- */
-int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{
unsigned pd_size, pd_entries, pts_size;
struct radeon_ib ib;
int r;
if (vm == NULL) {
return -EINVAL;
}
if (vm->page_directory != NULL) {
return 0;
}
pd_size = radeon_vm_directory_size(rdev);
pd_entries = radeon_vm_num_pdes(rdev);
-retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_directory, pd_size,
RADEON_VM_PTB_ALIGN_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
/* Initially clear the page directory */
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
NULL, pd_entries * 2 + 64);
if (r) {
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return r;
}
ib.length_dw = 0;
radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
0, pd_entries, 0, 0);
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ib_free(rdev, &ib);
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return r;
}
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);
/* allocate page table array */
pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return -ENOMEM;
}
return 0;
-}
-/**
- radeon_vm_add_to_lru - add VMs page table to LRU list
- @rdev: radeon_device pointer
- @vm: vm to add to LRU
- Add the allocated page table to the LRU list (cayman+).
- Global mutex must be locked!
- */
-void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{
list_del_init(&vm->list);
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
-}
-/**
- radeon_vm_grab_id - allocate the next free VMID
- @rdev: radeon_device pointer
- @vm: vm to allocate id for
- @ring: ring we want to submit job to
- Allocate an id for the vm (cayman+).
- Returns the fence we need to sync to (if any).
- Global and local mutex must be locked!
- */
-struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring)
-{
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
unsigned choices[2] = {};
unsigned i;
/* check if the id is still valid */
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
return NULL;
/* we definately need to flush */
radeon_fence_unref(&vm->last_flush);
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
struct radeon_fence *fence = rdev->vm_manager.active[i];
if (fence == NULL) {
/* found a free one */
vm->id = i;
trace_radeon_vm_grab_id(vm->id, ring);
return NULL;
}
if (radeon_fence_is_earlier(fence, best[fence->ring])) {
best[fence->ring] = fence;
choices[fence->ring == ring ? 0 : 1] = i;
}
}
for (i = 0; i < 2; ++i) {
if (choices[i]) {
vm->id = choices[i];
trace_radeon_vm_grab_id(vm->id, ring);
return rdev->vm_manager.active[choices[i]];
}
}
/* should never happen */
BUG();
return NULL;
-}
-/**
- radeon_vm_fence - remember fence for vm
- @rdev: radeon_device pointer
- @vm: vm we want to fence
- @fence: fence to remember
- Fence the vm (cayman+).
- Set the fence used to protect page table and id.
- Global and local mutex must be locked!
- */
-void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence)
-{
radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(fence);
radeon_fence_unref(&vm->last_id_use);
vm->last_id_use = radeon_fence_ref(fence);
-}
-/**
- radeon_vm_bo_find - find the bo_va for a specific vm & bo
- @vm: requested vm
- @bo: requested buffer object
- Find @bo inside the requested vm (cayman+).
- Search inside the @bos vm list for the requested vm
- Returns the found bo_va or NULL if none is found
- Object has to be reserved!
- */
-struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
struct radeon_bo *bo)
-{
struct radeon_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
if (bo_va->vm == vm) {
return bo_va;
}
}
return NULL;
-}
-/**
- radeon_vm_bo_add - add a bo to a specific vm
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- Add @bo into the requested vm (cayman+).
- Add @bo to the list of bos associated with the vm
- Returns newly added bo_va or NULL for failure
- Object has to be reserved!
- */
-struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo)
-{
struct radeon_bo_va *bo_va;
bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
if (bo_va == NULL) {
return NULL;
}
bo_va->vm = vm;
bo_va->bo = bo;
bo_va->soffset = 0;
bo_va->eoffset = 0;
bo_va->flags = 0;
bo_va->valid = false;
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->vm_list);
mutex_lock(&vm->mutex);
list_add(&bo_va->vm_list, &vm->va);
list_add_tail(&bo_va->bo_list, &bo->va);
mutex_unlock(&vm->mutex);
return bo_va;
-}
-/**
- radeon_vm_bo_set_addr - set bos virtual address inside a vm
- @rdev: radeon_device pointer
- @bo_va: bo_va to store the address
- @soffset: requested offset of the buffer in the VM address space
- @flags: attributes of pages (read/write/valid/etc.)
- Set offset of @bo_va (cayman+).
- Validate and set the offset requested within the vm address space.
- Returns 0 for success, error for failure.
- Object has to be reserved!
- */
-int radeon_vm_bo_set_addr(struct radeon_device *rdev,
struct radeon_bo_va *bo_va,
uint64_t soffset,
uint32_t flags)
-{
uint64_t size = radeon_bo_size(bo_va->bo);
uint64_t eoffset, last_offset = 0;
struct radeon_vm *vm = bo_va->vm;
struct radeon_bo_va *tmp;
struct list_head *head;
unsigned last_pfn;
if (soffset) {
/* make sure object fit at this offset */
eoffset = soffset + size;
if (soffset >= eoffset) {
return -EINVAL;
}
last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
if (last_pfn > rdev->vm_manager.max_pfn) {
dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
last_pfn, rdev->vm_manager.max_pfn);
return -EINVAL;
}
} else {
eoffset = last_pfn = 0;
}
mutex_lock(&vm->mutex);
head = &vm->va;
last_offset = 0;
list_for_each_entry(tmp, &vm->va, vm_list) {
if (bo_va == tmp) {
/* skip over currently modified bo */
continue;
}
if (soffset >= last_offset && eoffset <= tmp->soffset) {
/* bo can be added before this one */
break;
}
if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
/* bo and tmp overlap, invalid offset */
dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
mutex_unlock(&vm->mutex);
return -EINVAL;
}
last_offset = tmp->eoffset;
head = &tmp->vm_list;
}
bo_va->soffset = soffset;
bo_va->eoffset = eoffset;
bo_va->flags = flags;
bo_va->valid = false;
list_move(&bo_va->vm_list, head);
mutex_unlock(&vm->mutex);
return 0;
-}
-/**
- radeon_vm_map_gart - get the physical address of a gart page
- @rdev: radeon_device pointer
- @addr: the unmapped addr
- Look up the physical address of the page that the pte resolves
- to (cayman+).
- Returns the physical address of the page.
- */
-uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) -{
uint64_t result;
/* page table offset */
result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
/* in case cpu page size != gpu page size*/
result |= addr & (~PAGE_MASK);
return result;
-}
-/**
- radeon_vm_page_flags - translate page flags to what the hw uses
- @flags: flags comming from userspace
- Translate the flags the userspace ABI uses to hw flags.
- */
-static uint32_t radeon_vm_page_flags(uint32_t flags) -{
uint32_t hw_flags = 0;
hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
hw_flags |= R600_PTE_SYSTEM;
hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
}
return hw_flags;
-}
-/**
- radeon_vm_update_pdes - make sure that page directory is valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- Allocates new page tables if necessary
- and updates the page directory (cayman+).
- Returns 0 for success, error for failure.
- Global and local mutex must be locked!
- */
-static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end)
-{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0;
uint64_t pt_idx;
int r;
start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
/* walk over the address space and update the page directory */
for (pt_idx = start; pt_idx <= end; ++pt_idx) {
uint64_t pde, pt;
if (vm->page_tables[pt_idx])
continue;
-retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_tables[pt_idx],
RADEON_VM_PTE_COUNT * 8,
RADEON_GPU_PAGE_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
pde = vm->pd_gpu_addr + pt_idx * 8;
pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) {
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pde,
last_pt, count, incr,
R600_PTE_VALID);
count *= RADEON_VM_PTE_COUNT;
radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
count, 0, 0);
}
count = 1;
last_pde = pde;
last_pt = pt;
} else {
++count;
}
}
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
incr, R600_PTE_VALID);
count *= RADEON_VM_PTE_COUNT;
radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
count, 0, 0);
}
return 0;
-}
-/**
- radeon_vm_update_ptes - make sure that page tables are valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- @dst: destination address to map to
- @flags: mapping flags
- Update the page tables in the range @start - @end (cayman+).
- Global and local mutex must be locked!
- */
-static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
-{
static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
unsigned count = 0;
uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
unsigned nptes;
uint64_t pte;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = RADEON_VM_PTE_COUNT - (addr & mask);
pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
pte += (addr & mask) * 8;
if ((last_pte + 8 * count) != pte) {
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
}
count = nptes;
last_pte = pte;
last_dst = dst;
} else {
count += nptes;
}
addr += nptes;
dst += nptes * RADEON_GPU_PAGE_SIZE;
}
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
-}
-/**
- radeon_vm_bo_update - map a bo into the vm page table
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- @mem: ttm mem
- Fill in the page table entries for @bo (cayman+).
- Returns 0 for success, -EINVAL for failure.
- Object have to be reserved & global and local mutex must be locked!
- */
-int radeon_vm_bo_update(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo,
struct ttm_mem_reg *mem)
-{
struct radeon_ib ib;
struct radeon_bo_va *bo_va;
unsigned nptes, npdes, ndw;
uint64_t addr;
int r;
/* nothing to do if vm isn't bound */
if (vm->page_directory == NULL)
return 0;
bo_va = radeon_vm_bo_find(vm, bo);
if (bo_va == NULL) {
dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
return -EINVAL;
}
if (!bo_va->soffset) {
dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
bo, vm);
return -EINVAL;
}
if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
return 0;
bo_va->flags &= ~RADEON_VM_PAGE_VALID;
bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
if (mem) {
addr = mem->start << PAGE_SHIFT;
if (mem->mem_type != TTM_PL_SYSTEM) {
bo_va->flags |= RADEON_VM_PAGE_VALID;
bo_va->valid = true;
}
if (mem->mem_type == TTM_PL_TT) {
bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
} else {
addr += rdev->vm_manager.vram_base_offset;
}
} else {
addr = 0;
bo_va->valid = false;
}
trace_radeon_vm_bo_update(bo_va);
nptes = radeon_bo_ngpu_pages(bo);
/* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
/* padding, etc. */
ndw = 64;
if (RADEON_VM_BLOCK_SIZE > 11)
/* reserve space for one header for every 2k dwords */
ndw += (nptes >> 11) * 4;
else
/* reserve space for one header for
every (1 << BLOCK_SIZE) entries */
ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
/* reserve space for pte addresses */
ndw += nptes * 2;
/* reserve space for one header for every 2k dwords */
ndw += (npdes >> 11) * 4;
/* reserve space for pde addresses */
ndw += npdes * 2;
/* reserve space for clearing new page tables */
ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
/* update too big for an IB */
if (ndw > 0xfffff)
return -ENOMEM;
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
if (r)
return r;
ib.length_dw = 0;
r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
addr, radeon_vm_page_flags(bo_va->flags));
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);
return 0;
-}
-/**
- radeon_vm_bo_rmv - remove a bo to a specific vm
- @rdev: radeon_device pointer
- @bo_va: requested bo_va
- Remove @bo_va->bo from the requested vm (cayman+).
- Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
- remove the ptes for @bo_va in the page table.
- Returns 0 for success.
- Object have to be reserved!
- */
-int radeon_vm_bo_rmv(struct radeon_device *rdev,
struct radeon_bo_va *bo_va)
-{
int r = 0;
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&bo_va->vm->mutex);
if (bo_va->soffset) {
r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
}
mutex_unlock(&rdev->vm_manager.lock);
list_del(&bo_va->vm_list);
mutex_unlock(&bo_va->vm->mutex);
list_del(&bo_va->bo_list);
kfree(bo_va);
return r;
-}
-/**
- radeon_vm_bo_invalidate - mark the bo as invalid
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- Mark @bo as invalid (cayman+).
- */
-void radeon_vm_bo_invalidate(struct radeon_device *rdev,
struct radeon_bo *bo)
-{
struct radeon_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
bo_va->valid = false;
}
-}
-/**
- radeon_vm_init - initialize a vm instance
- @rdev: radeon_device pointer
- @vm: requested vm
- Init @vm fields (cayman+).
- */
-void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) -{
vm->id = 0;
vm->fence = NULL;
vm->last_flush = NULL;
vm->last_id_use = NULL;
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va);
-}
-/**
- radeon_vm_fini - tear down a vm instance
- @rdev: radeon_device pointer
- @vm: requested vm
- Tear down @vm (cayman+).
- Unbind the VM and remove all bos from the vm bo list
- */
-void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) -{
struct radeon_bo_va *bo_va, *tmp;
int r;
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
radeon_vm_free_pt(rdev, vm);
mutex_unlock(&rdev->vm_manager.lock);
if (!list_empty(&vm->va)) {
dev_err(rdev->dev, "still active bo inside vm\n");
}
list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
list_del_init(&bo_va->vm_list);
r = radeon_bo_reserve(bo_va->bo, false);
if (!r) {
list_del_init(&bo_va->bo_list);
radeon_bo_unreserve(bo_va->bo);
kfree(bo_va);
}
}
radeon_fence_unref(&vm->fence);
radeon_fence_unref(&vm->last_flush);
radeon_fence_unref(&vm->last_id_use);
mutex_unlock(&vm->mutex);
-} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c new file mode 100644 index 0000000..433b1eb --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -0,0 +1,981 @@ +/*
- Copyright 2008 Advanced Micro Devices, Inc.
- Copyright 2008 Red Hat Inc.
- Copyright 2009 Jerome Glisse.
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the "Software"),
- to deal in the Software without restriction, including without limitation
- the rights to use, copy, modify, merge, publish, distribute, sublicense,
- and/or sell copies of the Software, and to permit persons to whom the
- Software is furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- Authors: Dave Airlie
Alex Deucher
Jerome Glisse
- */
+#include <drm/drmP.h> +#include <drm/radeon_drm.h> +#include "radeon.h" +#include "radeon_trace.h"
+/*
- GPUVM
- GPUVM is similar to the legacy gart on older asics, however
- rather than there being a single global gart table
- for the entire GPU, there are multiple VM page tables active
- at any given time. The VM page tables can contain a mix
- vram pages and system memory pages and system memory pages
- can be mapped as snooped (cached system pages) or unsnooped
- (uncached system pages).
- Each VM has an ID associated with it and there is a page table
- associated with each VMID. When execting a command buffer,
- the kernel tells the the ring what VMID to use for that command
- buffer. VMIDs are allocated dynamically as commands are submitted.
- The userspace drivers maintain their own address space and the kernel
- sets up their pages tables accordingly when they submit their
- command buffers and a VMID is assigned.
- Cayman/Trinity support up to 8 active VMs at any given time;
- SI supports 16.
- */
+/**
- radeon_vm_num_pde - return the number of page directory entries
- @rdev: radeon_device pointer
- Calculate the number of page directory entries (cayman+).
- */
+static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{
return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
+}
+/**
- radeon_vm_directory_size - returns the size of the page directory in bytes
- @rdev: radeon_device pointer
- Calculate the size of the page directory in bytes (cayman+).
- */
+static unsigned radeon_vm_directory_size(struct radeon_device *rdev) +{
return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
+}
+/**
- radeon_vm_manager_init - init the vm manager
- @rdev: radeon_device pointer
- Init the vm manager (cayman+).
- Returns 0 for success, error for failure.
- */
+int radeon_vm_manager_init(struct radeon_device *rdev) +{
struct radeon_vm *vm;
struct radeon_bo_va *bo_va;
int r;
unsigned size;
if (!rdev->vm_manager.enabled) {
/* allocate enough for 2 full VM pts */
size = radeon_vm_directory_size(rdev);
size += rdev->vm_manager.max_pfn * 8;
size *= 2;
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
RADEON_GPU_PAGE_ALIGN(size),
RADEON_VM_PTB_ALIGN_SIZE,
RADEON_GEM_DOMAIN_VRAM);
if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
(rdev->vm_manager.max_pfn * 8) >> 10);
return r;
}
r = radeon_asic_vm_init(rdev);
if (r)
return r;
rdev->vm_manager.enabled = true;
r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
if (r)
return r;
}
/* restore page table */
list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
if (vm->page_directory == NULL)
continue;
list_for_each_entry(bo_va, &vm->va, vm_list) {
bo_va->valid = false;
}
}
return 0;
+}
+/**
- radeon_vm_free_pt - free the page table for a specific vm
- @rdev: radeon_device pointer
- @vm: vm to unbind
- Free the page table of a specific vm (cayman+).
- Global and local mutex must be lock!
- */
+static void radeon_vm_free_pt(struct radeon_device *rdev,
struct radeon_vm *vm)
+{
struct radeon_bo_va *bo_va;
int i;
if (!vm->page_directory)
return;
list_del_init(&vm->list);
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
list_for_each_entry(bo_va, &vm->va, vm_list) {
bo_va->valid = false;
}
if (vm->page_tables == NULL)
return;
for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
kfree(vm->page_tables);
+}
+/**
- radeon_vm_manager_fini - tear down the vm manager
- @rdev: radeon_device pointer
- Tear down the VM manager (cayman+).
- */
+void radeon_vm_manager_fini(struct radeon_device *rdev) +{
struct radeon_vm *vm, *tmp;
int i;
if (!rdev->vm_manager.enabled)
return;
mutex_lock(&rdev->vm_manager.lock);
/* free all allocated page tables */
list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
mutex_lock(&vm->mutex);
radeon_vm_free_pt(rdev, vm);
mutex_unlock(&vm->mutex);
}
for (i = 0; i < RADEON_NUM_VM; ++i) {
radeon_fence_unref(&rdev->vm_manager.active[i]);
}
radeon_asic_vm_fini(rdev);
mutex_unlock(&rdev->vm_manager.lock);
radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
rdev->vm_manager.enabled = false;
+}
+/**
- radeon_vm_evict - evict page table to make room for new one
- @rdev: radeon_device pointer
- @vm: VM we want to allocate something for
- Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
- Returns 0 for success, -ENOMEM for failure.
- Global and local mutex must be locked!
- */
+static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{
struct radeon_vm *vm_evict;
if (list_empty(&rdev->vm_manager.lru_vm))
return -ENOMEM;
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
struct radeon_vm, list);
if (vm_evict == vm)
return -ENOMEM;
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex);
return 0;
+}
+/**
- radeon_vm_alloc_pt - allocates a page table for a VM
- @rdev: radeon_device pointer
- @vm: vm to bind
- Allocate a page table for the requested vm (cayman+).
- Returns 0 for success, error for failure.
- Global and local mutex must be locked!
- */
+int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) +{
unsigned pd_size, pd_entries, pts_size;
struct radeon_ib ib;
int r;
if (vm == NULL) {
return -EINVAL;
}
if (vm->page_directory != NULL) {
return 0;
}
pd_size = radeon_vm_directory_size(rdev);
pd_entries = radeon_vm_num_pdes(rdev);
+retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_directory, pd_size,
RADEON_VM_PTB_ALIGN_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
/* Initially clear the page directory */
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
NULL, pd_entries * 2 + 64);
if (r) {
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return r;
}
ib.length_dw = 0;
radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
0, pd_entries, 0, 0);
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ib_free(rdev, &ib);
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return r;
}
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);
/* allocate page table array */
pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return -ENOMEM;
}
return 0;
+}
+/**
- radeon_vm_add_to_lru - add VMs page table to LRU list
- @rdev: radeon_device pointer
- @vm: vm to add to LRU
- Add the allocated page table to the LRU list (cayman+).
- Global mutex must be locked!
- */
+void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{
list_del_init(&vm->list);
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
+}
+/**
- radeon_vm_grab_id - allocate the next free VMID
- @rdev: radeon_device pointer
- @vm: vm to allocate id for
- @ring: ring we want to submit job to
- Allocate an id for the vm (cayman+).
- Returns the fence we need to sync to (if any).
- Global and local mutex must be locked!
- */
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
struct radeon_vm *vm, int ring)
+{
struct radeon_fence *best[RADEON_NUM_RINGS] = {};
unsigned choices[2] = {};
unsigned i;
/* check if the id is still valid */
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
return NULL;
/* we definately need to flush */
radeon_fence_unref(&vm->last_flush);
/* skip over VMID 0, since it is the system VM */
for (i = 1; i < rdev->vm_manager.nvm; ++i) {
struct radeon_fence *fence = rdev->vm_manager.active[i];
if (fence == NULL) {
/* found a free one */
vm->id = i;
trace_radeon_vm_grab_id(vm->id, ring);
return NULL;
}
if (radeon_fence_is_earlier(fence, best[fence->ring])) {
best[fence->ring] = fence;
choices[fence->ring == ring ? 0 : 1] = i;
}
}
for (i = 0; i < 2; ++i) {
if (choices[i]) {
vm->id = choices[i];
trace_radeon_vm_grab_id(vm->id, ring);
return rdev->vm_manager.active[choices[i]];
}
}
/* should never happen */
BUG();
return NULL;
+}
+/**
- radeon_vm_fence - remember fence for vm
- @rdev: radeon_device pointer
- @vm: vm we want to fence
- @fence: fence to remember
- Fence the vm (cayman+).
- Set the fence used to protect page table and id.
- Global and local mutex must be locked!
- */
+void radeon_vm_fence(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_fence *fence)
+{
radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(fence);
radeon_fence_unref(&vm->last_id_use);
vm->last_id_use = radeon_fence_ref(fence);
+}
+/**
- radeon_vm_bo_find - find the bo_va for a specific vm & bo
- @vm: requested vm
- @bo: requested buffer object
- Find @bo inside the requested vm (cayman+).
- Search inside the @bos vm list for the requested vm
- Returns the found bo_va or NULL if none is found
- Object has to be reserved!
- */
+struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
struct radeon_bo *bo)
+{
struct radeon_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
if (bo_va->vm == vm) {
return bo_va;
}
}
return NULL;
+}
+/**
- radeon_vm_bo_add - add a bo to a specific vm
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- Add @bo into the requested vm (cayman+).
- Add @bo to the list of bos associated with the vm
- Returns newly added bo_va or NULL for failure
- Object has to be reserved!
- */
+struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo)
+{
struct radeon_bo_va *bo_va;
bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
if (bo_va == NULL) {
return NULL;
}
bo_va->vm = vm;
bo_va->bo = bo;
bo_va->soffset = 0;
bo_va->eoffset = 0;
bo_va->flags = 0;
bo_va->valid = false;
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->vm_list);
mutex_lock(&vm->mutex);
list_add(&bo_va->vm_list, &vm->va);
list_add_tail(&bo_va->bo_list, &bo->va);
mutex_unlock(&vm->mutex);
return bo_va;
+}
+/**
- radeon_vm_bo_set_addr - set bos virtual address inside a vm
- @rdev: radeon_device pointer
- @bo_va: bo_va to store the address
- @soffset: requested offset of the buffer in the VM address space
- @flags: attributes of pages (read/write/valid/etc.)
- Set offset of @bo_va (cayman+).
- Validate and set the offset requested within the vm address space.
- Returns 0 for success, error for failure.
- Object has to be reserved!
- */
+int radeon_vm_bo_set_addr(struct radeon_device *rdev,
struct radeon_bo_va *bo_va,
uint64_t soffset,
uint32_t flags)
+{
uint64_t size = radeon_bo_size(bo_va->bo);
uint64_t eoffset, last_offset = 0;
struct radeon_vm *vm = bo_va->vm;
struct radeon_bo_va *tmp;
struct list_head *head;
unsigned last_pfn;
if (soffset) {
/* make sure object fit at this offset */
eoffset = soffset + size;
if (soffset >= eoffset) {
return -EINVAL;
}
last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
if (last_pfn > rdev->vm_manager.max_pfn) {
dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
last_pfn, rdev->vm_manager.max_pfn);
return -EINVAL;
}
} else {
eoffset = last_pfn = 0;
}
mutex_lock(&vm->mutex);
head = &vm->va;
last_offset = 0;
list_for_each_entry(tmp, &vm->va, vm_list) {
if (bo_va == tmp) {
/* skip over currently modified bo */
continue;
}
if (soffset >= last_offset && eoffset <= tmp->soffset) {
/* bo can be added before this one */
break;
}
if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
/* bo and tmp overlap, invalid offset */
dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
mutex_unlock(&vm->mutex);
return -EINVAL;
}
last_offset = tmp->eoffset;
head = &tmp->vm_list;
}
bo_va->soffset = soffset;
bo_va->eoffset = eoffset;
bo_va->flags = flags;
bo_va->valid = false;
list_move(&bo_va->vm_list, head);
mutex_unlock(&vm->mutex);
return 0;
+}
+/**
- radeon_vm_map_gart - get the physical address of a gart page
- @rdev: radeon_device pointer
- @addr: the unmapped addr
- Look up the physical address of the page that the pte resolves
- to (cayman+).
- Returns the physical address of the page.
- */
+uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) +{
uint64_t result;
/* page table offset */
result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
/* in case cpu page size != gpu page size*/
result |= addr & (~PAGE_MASK);
return result;
+}
+/**
- radeon_vm_page_flags - translate page flags to what the hw uses
- @flags: flags comming from userspace
- Translate the flags the userspace ABI uses to hw flags.
- */
+static uint32_t radeon_vm_page_flags(uint32_t flags) +{
uint32_t hw_flags = 0;
hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
hw_flags |= R600_PTE_SYSTEM;
hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
}
return hw_flags;
+}
+/**
- radeon_vm_update_pdes - make sure that page directory is valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- Allocates new page tables if necessary
- and updates the page directory (cayman+).
- Returns 0 for success, error for failure.
- Global and local mutex must be locked!
- */
+static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end)
+{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0;
uint64_t pt_idx;
int r;
start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
/* walk over the address space and update the page directory */
for (pt_idx = start; pt_idx <= end; ++pt_idx) {
uint64_t pde, pt;
if (vm->page_tables[pt_idx])
continue;
+retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_tables[pt_idx],
RADEON_VM_PTE_COUNT * 8,
RADEON_GPU_PAGE_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
pde = vm->pd_gpu_addr + pt_idx * 8;
pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) {
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pde,
last_pt, count, incr,
R600_PTE_VALID);
count *= RADEON_VM_PTE_COUNT;
radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
count, 0, 0);
}
count = 1;
last_pde = pde;
last_pt = pt;
} else {
++count;
}
}
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
incr, R600_PTE_VALID);
count *= RADEON_VM_PTE_COUNT;
radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
count, 0, 0);
}
return 0;
+}
+/**
- radeon_vm_update_ptes - make sure that page tables are valid
- @rdev: radeon_device pointer
- @vm: requested vm
- @start: start of GPU address range
- @end: end of GPU address range
- @dst: destination address to map to
- @flags: mapping flags
- Update the page tables in the range @start - @end (cayman+).
- Global and local mutex must be locked!
- */
+static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
+{
static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
unsigned count = 0;
uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
unsigned nptes;
uint64_t pte;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = RADEON_VM_PTE_COUNT - (addr & mask);
pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
pte += (addr & mask) * 8;
if ((last_pte + 8 * count) != pte) {
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
}
count = nptes;
last_pte = pte;
last_dst = dst;
} else {
count += nptes;
}
addr += nptes;
dst += nptes * RADEON_GPU_PAGE_SIZE;
}
if (count) {
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
+}
+/**
- radeon_vm_bo_update - map a bo into the vm page table
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- @mem: ttm mem
- Fill in the page table entries for @bo (cayman+).
- Returns 0 for success, -EINVAL for failure.
- Object have to be reserved & global and local mutex must be locked!
- */
+int radeon_vm_bo_update(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo,
struct ttm_mem_reg *mem)
+{
struct radeon_ib ib;
struct radeon_bo_va *bo_va;
unsigned nptes, npdes, ndw;
uint64_t addr;
int r;
/* nothing to do if vm isn't bound */
if (vm->page_directory == NULL)
return 0;
bo_va = radeon_vm_bo_find(vm, bo);
if (bo_va == NULL) {
dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
return -EINVAL;
}
if (!bo_va->soffset) {
dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
bo, vm);
return -EINVAL;
}
if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
return 0;
bo_va->flags &= ~RADEON_VM_PAGE_VALID;
bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
if (mem) {
addr = mem->start << PAGE_SHIFT;
if (mem->mem_type != TTM_PL_SYSTEM) {
bo_va->flags |= RADEON_VM_PAGE_VALID;
bo_va->valid = true;
}
if (mem->mem_type == TTM_PL_TT) {
bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
} else {
addr += rdev->vm_manager.vram_base_offset;
}
} else {
addr = 0;
bo_va->valid = false;
}
trace_radeon_vm_bo_update(bo_va);
nptes = radeon_bo_ngpu_pages(bo);
/* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
/* padding, etc. */
ndw = 64;
if (RADEON_VM_BLOCK_SIZE > 11)
/* reserve space for one header for every 2k dwords */
ndw += (nptes >> 11) * 4;
else
/* reserve space for one header for
every (1 << BLOCK_SIZE) entries */
ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
/* reserve space for pte addresses */
ndw += nptes * 2;
/* reserve space for one header for every 2k dwords */
ndw += (npdes >> 11) * 4;
/* reserve space for pde addresses */
ndw += npdes * 2;
/* reserve space for clearing new page tables */
ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
/* update too big for an IB */
if (ndw > 0xfffff)
return -ENOMEM;
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
if (r)
return r;
ib.length_dw = 0;
r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
addr, radeon_vm_page_flags(bo_va->flags));
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ib_free(rdev, &ib);
return r;
}
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);
return 0;
+}
+/**
- radeon_vm_bo_rmv - remove a bo to a specific vm
- @rdev: radeon_device pointer
- @bo_va: requested bo_va
- Remove @bo_va->bo from the requested vm (cayman+).
- Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
- remove the ptes for @bo_va in the page table.
- Returns 0 for success.
- Object have to be reserved!
- */
+int radeon_vm_bo_rmv(struct radeon_device *rdev,
struct radeon_bo_va *bo_va)
+{
int r = 0;
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&bo_va->vm->mutex);
if (bo_va->soffset) {
r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
}
mutex_unlock(&rdev->vm_manager.lock);
list_del(&bo_va->vm_list);
mutex_unlock(&bo_va->vm->mutex);
list_del(&bo_va->bo_list);
kfree(bo_va);
return r;
+}
+/**
- radeon_vm_bo_invalidate - mark the bo as invalid
- @rdev: radeon_device pointer
- @vm: requested vm
- @bo: radeon buffer object
- Mark @bo as invalid (cayman+).
- */
+void radeon_vm_bo_invalidate(struct radeon_device *rdev,
struct radeon_bo *bo)
+{
struct radeon_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
bo_va->valid = false;
}
+}
+/**
- radeon_vm_init - initialize a vm instance
- @rdev: radeon_device pointer
- @vm: requested vm
- Init @vm fields (cayman+).
- */
+void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{
vm->id = 0;
vm->fence = NULL;
vm->last_flush = NULL;
vm->last_id_use = NULL;
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va);
+}
+/**
- radeon_vm_fini - tear down a vm instance
- @rdev: radeon_device pointer
- @vm: requested vm
- Tear down @vm (cayman+).
- Unbind the VM and remove all bos from the vm bo list
- */
+void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{
struct radeon_bo_va *bo_va, *tmp;
int r;
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
radeon_vm_free_pt(rdev, vm);
mutex_unlock(&rdev->vm_manager.lock);
if (!list_empty(&vm->va)) {
dev_err(rdev->dev, "still active bo inside vm\n");
}
list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
list_del_init(&bo_va->vm_list);
r = radeon_bo_reserve(bo_va->bo, false);
if (!r) {
list_del_init(&bo_va->bo_list);
radeon_bo_unreserve(bo_va->bo);
kfree(bo_va);
}
}
radeon_fence_unref(&vm->fence);
radeon_fence_unref(&vm->last_flush);
radeon_fence_unref(&vm->last_id_use);
mutex_unlock(&vm->mutex);
+}
1.8.3.2
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org