From: Christian König christian.koenig@amd.com
Both are complex enough on their own.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 958 ---------------------------------- drivers/gpu/drm/radeon/radeon_vm.c | 981 +++++++++++++++++++++++++++++++++++ 3 files changed, 982 insertions(+), 959 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vm.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ed60caa..0943353 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o + ci_dpm.o dce6_afmt.o radeon_vm.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a8f9b46..2e72365 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -28,8 +28,6 @@ #include <drm/drmP.h> #include <drm/radeon_drm.h> #include "radeon.h" -#include "radeon_reg.h" -#include "radeon_trace.h"
/* * GART @@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev)
radeon_dummy_page_fini(rdev); } - -/* - * GPUVM - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages - * can be mapped as snooped (cached system pages) or unsnooped - * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When execting a command buffer, - * the kernel tells the the ring what VMID to use for that command - * buffer. VMIDs are allocated dynamically as commands are submitted. - * The userspace drivers maintain their own address space and the kernel - * sets up their pages tables accordingly when they submit their - * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. - */ - -/* - * vm helpers - * - * TODO bind a default page at vm initialization for default address - */ - -/** - * radeon_vm_num_pde - return the number of page directory entries - * - * @rdev: radeon_device pointer - * - * Calculate the number of page directory entries (cayman+). - */ -static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) -{ - return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; -} - -/** - * radeon_vm_directory_size - returns the size of the page directory in bytes - * - * @rdev: radeon_device pointer - * - * Calculate the size of the page directory in bytes (cayman+). - */ -static unsigned radeon_vm_directory_size(struct radeon_device *rdev) -{ - return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); -} - -/** - * radeon_vm_manager_init - init the vm manager - * - * @rdev: radeon_device pointer - * - * Init the vm manager (cayman+). - * Returns 0 for success, error for failure. - */ -int radeon_vm_manager_init(struct radeon_device *rdev) -{ - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; - int r; - unsigned size; - - if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - - r = radeon_asic_vm_init(rdev); - if (r) - return r; - - rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - } - return 0; -} - -/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** - * radeon_vm_manager_fini - tear down the vm manager - * - * @rdev: radeon_device pointer - * - * Tear down the VM manager (cayman+). - */ -void radeon_vm_manager_fini(struct radeon_device *rdev) -{ - struct radeon_vm *vm, *tmp; - int i; - - if (!rdev->vm_manager.enabled) - return; - - mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { - radeon_fence_unref(&rdev->vm_manager.active[i]); - } - radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); - rdev->vm_manager.enabled = false; -} - -/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for - * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_vm *vm_evict; - - if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; - - radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); - - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; - } - - return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); -} - -/** - * radeon_vm_grab_id - allocate the next free VMID - * - * @rdev: radeon_device pointer - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * - * Allocate an id for the vm (cayman+). - * Returns the fence we need to sync to (if any). - * - * Global and local mutex must be locked! - */ -struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, - struct radeon_vm *vm, int ring) -{ - struct radeon_fence *best[RADEON_NUM_RINGS] = {}; - unsigned choices[2] = {}; - unsigned i; - - /* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) - return NULL; - - /* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); - - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < rdev->vm_manager.nvm; ++i) { - struct radeon_fence *fence = rdev->vm_manager.active[i]; - - if (fence == NULL) { - /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); - return NULL; - } - - if (radeon_fence_is_earlier(fence, best[fence->ring])) { - best[fence->ring] = fence; - choices[fence->ring == ring ? 0 : 1] = i; - } - } - - for (i = 0; i < 2; ++i) { - if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); - return rdev->vm_manager.active[choices[i]]; - } - } - - /* should never happen */ - BUG(); - return NULL; -} - -/** - * radeon_vm_fence - remember fence for vm - * - * @rdev: radeon_device pointer - * @vm: vm we want to fence - * @fence: fence to remember - * - * Fence the vm (cayman+). - * Set the fence used to protect page table and id. - * - * Global and local mutex must be locked! - */ -void radeon_vm_fence(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_fence *fence) -{ - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - - radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); -} - -/** - * radeon_vm_bo_find - find the bo_va for a specific vm & bo - * - * @vm: requested vm - * @bo: requested buffer object - * - * Find @bo inside the requested vm (cayman+). - * Search inside the @bos vm list for the requested vm - * Returns the found bo_va or NULL if none is found - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { - return bo_va; - } - } - return NULL; -} - -/** - * radeon_vm_bo_add - add a bo to a specific vm - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Add @bo into the requested vm (cayman+). - * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure - * - * Object has to be reserved! - */ -struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (bo_va == NULL) { - return NULL; - } - bo_va->vm = vm; - bo_va->bo = bo; - bo_va->soffset = 0; - bo_va->eoffset = 0; - bo_va->flags = 0; - bo_va->valid = false; - bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->vm_list); - - mutex_lock(&vm->mutex); - list_add(&bo_va->vm_list, &vm->va); - list_add_tail(&bo_va->bo_list, &bo->va); - mutex_unlock(&vm->mutex); - - return bo_va; -} - -/** - * radeon_vm_bo_set_addr - set bos virtual address inside a vm - * - * @rdev: radeon_device pointer - * @bo_va: bo_va to store the address - * @soffset: requested offset of the buffer in the VM address space - * @flags: attributes of pages (read/write/valid/etc.) - * - * Set offset of @bo_va (cayman+). - * Validate and set the offset requested within the vm address space. - * Returns 0 for success, error for failure. - * - * Object has to be reserved! - */ -int radeon_vm_bo_set_addr(struct radeon_device *rdev, - struct radeon_bo_va *bo_va, - uint64_t soffset, - uint32_t flags) -{ - uint64_t size = radeon_bo_size(bo_va->bo); - uint64_t eoffset, last_offset = 0; - struct radeon_vm *vm = bo_va->vm; - struct radeon_bo_va *tmp; - struct list_head *head; - unsigned last_pfn; - - if (soffset) { - /* make sure object fit at this offset */ - eoffset = soffset + size; - if (soffset >= eoffset) { - return -EINVAL; - } - - last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", - last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; - } - - } else { - eoffset = last_pfn = 0; - } - - mutex_lock(&vm->mutex); - head = &vm->va; - last_offset = 0; - list_for_each_entry(tmp, &vm->va, vm_list) { - if (bo_va == tmp) { - /* skip over currently modified bo */ - continue; - } - - if (soffset >= last_offset && eoffset <= tmp->soffset) { - /* bo can be added before this one */ - break; - } - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { - /* bo and tmp overlap, invalid offset */ - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); - mutex_unlock(&vm->mutex); - return -EINVAL; - } - last_offset = tmp->eoffset; - head = &tmp->vm_list; - } - - bo_va->soffset = soffset; - bo_va->eoffset = eoffset; - bo_va->flags = flags; - bo_va->valid = false; - list_move(&bo_va->vm_list, head); - - mutex_unlock(&vm->mutex); - return 0; -} - -/** - * radeon_vm_map_gart - get the physical address of a gart page - * - * @rdev: radeon_device pointer - * @addr: the unmapped addr - * - * Look up the physical address of the page that the pte resolves - * to (cayman+). - * Returns the physical address of the page. - */ -uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) -{ - uint64_t result; - - /* page table offset */ - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); - - return result; -} - -/** - * radeon_vm_page_flags - translate page flags to what the hw uses - * - * @flags: flags comming from userspace - * - * Translate the flags the userspace ABI uses to hw flags. - */ -static uint32_t radeon_vm_page_flags(uint32_t flags) -{ - uint32_t hw_flags = 0; - hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; - hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; - hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - hw_flags |= R600_PTE_SYSTEM; - hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; - } - return hw_flags; -} - -/** - * radeon_vm_update_pdes - make sure that page directory is valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) -{ - static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; - - uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; - int r; - - start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - - /* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { - uint64_t pde, pt; - - if (vm->page_tables[pt_idx]) - continue; - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, - last_pt, count, incr, - R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - count = 1; - last_pde = pde; - last_pt = pt; - } else { - ++count; - } - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, - incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); - } - - return 0; -} - -/** - * radeon_vm_update_ptes - make sure that page tables are valid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to - * @flags: mapping flags - * - * Update the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! - */ -static void radeon_vm_update_ptes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) -{ - static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; - - uint64_t last_pte = ~0, last_dst = ~0; - unsigned count = 0; - uint64_t addr; - - start = start / RADEON_GPU_PAGE_SIZE; - end = end / RADEON_GPU_PAGE_SIZE; - - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; ) { - uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; - unsigned nptes; - uint64_t pte; - - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = RADEON_VM_PTE_COUNT - (addr & mask); - - pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); - pte += (addr & mask) * 8; - - if ((last_pte + 8 * count) != pte) { - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, - flags); - } - - count = nptes; - last_pte = pte; - last_dst = dst; - } else { - count += nptes; - } - - addr += nptes; - dst += nptes * RADEON_GPU_PAGE_SIZE; - } - - if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pte, - last_dst, count, - RADEON_GPU_PAGE_SIZE, flags); - } -} - -/** - * radeon_vm_bo_update - map a bo into the vm page table - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * @mem: ttm mem - * - * Fill in the page table entries for @bo (cayman+). - * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved & global and local mutex must be locked! - */ -int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, - struct ttm_mem_reg *mem) -{ - struct radeon_ib ib; - struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; - uint64_t addr; - int r; - - /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - - bo_va = radeon_vm_bo_find(vm, bo); - if (bo_va == NULL) { - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); - return -EINVAL; - } - - if (!bo_va->soffset) { - dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", - bo, vm); - return -EINVAL; - } - - if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) - return 0; - - bo_va->flags &= ~RADEON_VM_PAGE_VALID; - bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; - if (mem) { - addr = mem->start << PAGE_SHIFT; - if (mem->mem_type != TTM_PL_SYSTEM) { - bo_va->flags |= RADEON_VM_PAGE_VALID; - bo_va->valid = true; - } - if (mem->mem_type == TTM_PL_TT) { - bo_va->flags |= RADEON_VM_PAGE_SYSTEM; - } else { - addr += rdev->vm_manager.vram_base_offset; - } - } else { - addr = 0; - bo_va->valid = false; - } - - trace_radeon_vm_bo_update(bo_va); - - nptes = radeon_bo_ngpu_pages(bo); - - /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - - /* padding, etc. */ - ndw = 64; - - if (RADEON_VM_BLOCK_SIZE > 11) - /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 4; - else - /* reserve space for one header for - every (1 << BLOCK_SIZE) entries */ - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; - - /* reserve space for pte addresses */ - ndw += nptes * 2; - - /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); - if (r) - return r; - ib.length_dw = 0; - - r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, - addr, radeon_vm_page_flags(bo_va->flags)); - - radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - return 0; -} - -/** - * radeon_vm_bo_rmv - remove a bo to a specific vm - * - * @rdev: radeon_device pointer - * @bo_va: requested bo_va - * - * Remove @bo_va->bo from the requested vm (cayman+). - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and - * remove the ptes for @bo_va in the page table. - * Returns 0 for success. - * - * Object have to be reserved! - */ -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va) -{ - int r = 0; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - - kfree(bo_va); - return r; -} - -/** - * radeon_vm_bo_invalidate - mark the bo as invalid - * - * @rdev: radeon_device pointer - * @vm: requested vm - * @bo: radeon buffer object - * - * Mark @bo as invalid (cayman+). - */ -void radeon_vm_bo_invalidate(struct radeon_device *rdev, - struct radeon_bo *bo) -{ - struct radeon_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - bo_va->valid = false; - } -} - -/** - * radeon_vm_init - initialize a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Init @vm fields (cayman+). - */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) -{ - vm->id = 0; - vm->fence = NULL; - vm->last_flush = NULL; - vm->last_id_use = NULL; - mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); - INIT_LIST_HEAD(&vm->va); -} - -/** - * radeon_vm_fini - tear down a vm instance - * - * @rdev: radeon_device pointer - * @vm: requested vm - * - * Tear down @vm (cayman+). - * Unbind the VM and remove all bos from the vm bo list - */ -void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); - - if (!list_empty(&vm->va)) { - dev_err(rdev->dev, "still active bo inside vm\n"); - } - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { - list_del_init(&bo_va->vm_list); - r = radeon_bo_reserve(bo_va->bo, false); - if (!r) { - list_del_init(&bo_va->bo_list); - radeon_bo_unreserve(bo_va->bo); - kfree(bo_va); - } - } - radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); - radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); -} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c new file mode 100644 index 0000000..433b1eb --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -0,0 +1,981 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Dave Airlie + * Alex Deucher + * Jerome Glisse + */ +#include <drm/drmP.h> +#include <drm/radeon_drm.h> +#include "radeon.h" +#include "radeon_trace.h" + +/* + * GPUVM + * GPUVM is similar to the legacy gart on older asics, however + * rather than there being a single global gart table + * for the entire GPU, there are multiple VM page tables active + * at any given time. The VM page tables can contain a mix + * vram pages and system memory pages and system memory pages + * can be mapped as snooped (cached system pages) or unsnooped + * (uncached system pages). + * Each VM has an ID associated with it and there is a page table + * associated with each VMID. When execting a command buffer, + * the kernel tells the the ring what VMID to use for that command + * buffer. VMIDs are allocated dynamically as commands are submitted. + * The userspace drivers maintain their own address space and the kernel + * sets up their pages tables accordingly when they submit their + * command buffers and a VMID is assigned. + * Cayman/Trinity support up to 8 active VMs at any given time; + * SI supports 16. + */ + +/** + * radeon_vm_num_pde - return the number of page directory entries + * + * @rdev: radeon_device pointer + * + * Calculate the number of page directory entries (cayman+). + */ +static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) +{ + return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE; +} + +/** + * radeon_vm_directory_size - returns the size of the page directory in bytes + * + * @rdev: radeon_device pointer + * + * Calculate the size of the page directory in bytes (cayman+). + */ +static unsigned radeon_vm_directory_size(struct radeon_device *rdev) +{ + return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); +} + +/** + * radeon_vm_manager_init - init the vm manager + * + * @rdev: radeon_device pointer + * + * Init the vm manager (cayman+). + * Returns 0 for success, error for failure. + */ +int radeon_vm_manager_init(struct radeon_device *rdev) +{ + struct radeon_vm *vm; + struct radeon_bo_va *bo_va; + int r; + unsigned size; + + if (!rdev->vm_manager.enabled) { + /* allocate enough for 2 full VM pts */ + size = radeon_vm_directory_size(rdev); + size += rdev->vm_manager.max_pfn * 8; + size *= 2; + r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, + RADEON_GPU_PAGE_ALIGN(size), + RADEON_VM_PTB_ALIGN_SIZE, + RADEON_GEM_DOMAIN_VRAM); + if (r) { + dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", + (rdev->vm_manager.max_pfn * 8) >> 10); + return r; + } + + r = radeon_asic_vm_init(rdev); + if (r) + return r; + + rdev->vm_manager.enabled = true; + + r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); + if (r) + return r; + } + + /* restore page table */ + list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { + if (vm->page_directory == NULL) + continue; + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + } + return 0; +} + +/** + * radeon_vm_free_pt - free the page table for a specific vm + * + * @rdev: radeon_device pointer + * @vm: vm to unbind + * + * Free the page table of a specific vm (cayman+). + * + * Global and local mutex must be lock! + */ +static void radeon_vm_free_pt(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va; + int i; + + if (!vm->page_directory) + return; + + list_del_init(&vm->list); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + + list_for_each_entry(bo_va, &vm->va, vm_list) { + bo_va->valid = false; + } + + if (vm->page_tables == NULL) + return; + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); + + kfree(vm->page_tables); +} + +/** + * radeon_vm_manager_fini - tear down the vm manager + * + * @rdev: radeon_device pointer + * + * Tear down the VM manager (cayman+). + */ +void radeon_vm_manager_fini(struct radeon_device *rdev) +{ + struct radeon_vm *vm, *tmp; + int i; + + if (!rdev->vm_manager.enabled) + return; + + mutex_lock(&rdev->vm_manager.lock); + /* free all allocated page tables */ + list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&vm->mutex); + } + for (i = 0; i < RADEON_NUM_VM; ++i) { + radeon_fence_unref(&rdev->vm_manager.active[i]); + } + radeon_asic_vm_fini(rdev); + mutex_unlock(&rdev->vm_manager.lock); + + radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); + radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); + rdev->vm_manager.enabled = false; +} + +/** + * radeon_vm_evict - evict page table to make room for new one + * + * @rdev: radeon_device pointer + * @vm: VM we want to allocate something for + * + * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). + * Returns 0 for success, -ENOMEM for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_vm *vm_evict; + + if (list_empty(&rdev->vm_manager.lru_vm)) + return -ENOMEM; + + vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, + struct radeon_vm, list); + if (vm_evict == vm) + return -ENOMEM; + + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); + return 0; +} + +/** + * radeon_vm_alloc_pt - allocates a page table for a VM + * + * @rdev: radeon_device pointer + * @vm: vm to bind + * + * Allocate a page table for the requested vm (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) +{ + unsigned pd_size, pd_entries, pts_size; + struct radeon_ib ib; + int r; + + if (vm == NULL) { + return -EINVAL; + } + + if (vm->page_directory != NULL) { + return 0; + } + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_directory, pd_size, + RADEON_VM_PTB_ALIGN_SIZE, false); + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + + } else if (r) { + return r; + } + + vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); + + /* Initially clear the page directory */ + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, pd_entries * 2 + 64); + if (r) { + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, + 0, pd_entries, 0, 0); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + /* allocate page table array */ + pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); + return -ENOMEM; + } + + return 0; +} + +/** + * radeon_vm_add_to_lru - add VMs page table to LRU list + * + * @rdev: radeon_device pointer + * @vm: vm to add to LRU + * + * Add the allocated page table to the LRU list (cayman+). + * + * Global mutex must be locked! + */ +void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) +{ + list_del_init(&vm->list); + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + trace_radeon_vm_grab_id(vm->id, ring); + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; + } + } + + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + trace_radeon_vm_grab_id(vm->id, ring); + return rdev->vm_manager.active[choices[i]]; + } + } + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->last_id_use); + vm->last_id_use = radeon_fence_ref(fence); +} + +/** + * radeon_vm_bo_find - find the bo_va for a specific vm & bo + * + * @vm: requested vm + * @bo: requested buffer object + * + * Find @bo inside the requested vm (cayman+). + * Search inside the @bos vm list for the requested vm + * Returns the found bo_va or NULL if none is found + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + if (bo_va->vm == vm) { + return bo_va; + } + } + return NULL; +} + +/** + * radeon_vm_bo_add - add a bo to a specific vm + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Add @bo into the requested vm (cayman+). + * Add @bo to the list of bos associated with the vm + * Returns newly added bo_va or NULL for failure + * + * Object has to be reserved! + */ +struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (bo_va == NULL) { + return NULL; + } + bo_va->vm = vm; + bo_va->bo = bo; + bo_va->soffset = 0; + bo_va->eoffset = 0; + bo_va->flags = 0; + bo_va->valid = false; + bo_va->ref_count = 1; + INIT_LIST_HEAD(&bo_va->bo_list); + INIT_LIST_HEAD(&bo_va->vm_list); + + mutex_lock(&vm->mutex); + list_add(&bo_va->vm_list, &vm->va); + list_add_tail(&bo_va->bo_list, &bo->va); + mutex_unlock(&vm->mutex); + + return bo_va; +} + +/** + * radeon_vm_bo_set_addr - set bos virtual address inside a vm + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to store the address + * @soffset: requested offset of the buffer in the VM address space + * @flags: attributes of pages (read/write/valid/etc.) + * + * Set offset of @bo_va (cayman+). + * Validate and set the offset requested within the vm address space. + * Returns 0 for success, error for failure. + * + * Object has to be reserved! + */ +int radeon_vm_bo_set_addr(struct radeon_device *rdev, + struct radeon_bo_va *bo_va, + uint64_t soffset, + uint32_t flags) +{ + uint64_t size = radeon_bo_size(bo_va->bo); + uint64_t eoffset, last_offset = 0; + struct radeon_vm *vm = bo_va->vm; + struct radeon_bo_va *tmp; + struct list_head *head; + unsigned last_pfn; + + if (soffset) { + /* make sure object fit at this offset */ + eoffset = soffset + size; + if (soffset >= eoffset) { + return -EINVAL; + } + + last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; + if (last_pfn > rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + last_pfn, rdev->vm_manager.max_pfn); + return -EINVAL; + } + + } else { + eoffset = last_pfn = 0; + } + + mutex_lock(&vm->mutex); + head = &vm->va; + last_offset = 0; + list_for_each_entry(tmp, &vm->va, vm_list) { + if (bo_va == tmp) { + /* skip over currently modified bo */ + continue; + } + + if (soffset >= last_offset && eoffset <= tmp->soffset) { + /* bo can be added before this one */ + break; + } + if (eoffset > tmp->soffset && soffset < tmp->eoffset) { + /* bo and tmp overlap, invalid offset */ + dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", + bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, + (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + mutex_unlock(&vm->mutex); + return -EINVAL; + } + last_offset = tmp->eoffset; + head = &tmp->vm_list; + } + + bo_va->soffset = soffset; + bo_va->eoffset = eoffset; + bo_va->flags = flags; + bo_va->valid = false; + list_move(&bo_va->vm_list, head); + + mutex_unlock(&vm->mutex); + return 0; +} + +/** + * radeon_vm_map_gart - get the physical address of a gart page + * + * @rdev: radeon_device pointer + * @addr: the unmapped addr + * + * Look up the physical address of the page that the pte resolves + * to (cayman+). + * Returns the physical address of the page. + */ +uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) +{ + uint64_t result; + + /* page table offset */ + result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; + + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); + + return result; +} + +/** + * radeon_vm_page_flags - translate page flags to what the hw uses + * + * @flags: flags comming from userspace + * + * Translate the flags the userspace ABI uses to hw flags. + */ +static uint32_t radeon_vm_page_flags(uint32_t flags) +{ + uint32_t hw_flags = 0; + hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; + hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; + hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + hw_flags |= R600_PTE_SYSTEM; + hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; + } + return hw_flags; +} + +/** + * radeon_vm_update_pdes - make sure that page directory is valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory (cayman+). + * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! + */ +static int radeon_vm_update_pdes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end) +{ + static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; + + uint64_t last_pde = ~0, last_pt = ~0; + unsigned count = 0; + uint64_t pt_idx; + int r; + + start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + /* walk over the address space and update the page directory */ + for (pt_idx = start; pt_idx <= end; ++pt_idx) { + uint64_t pde, pt; + + if (vm->page_tables[pt_idx]) + continue; + +retry: + r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + &vm->page_tables[pt_idx], + RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false); + + if (r == -ENOMEM) { + r = radeon_vm_evict(rdev, vm); + if (r) + return r; + goto retry; + } else if (r) { + return r; + } + + pde = vm->pd_gpu_addr + pt_idx * 8; + + pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt)) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, + last_pt, count, incr, + R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + count = 1; + last_pde = pde; + last_pt = pt; + } else { + ++count; + } + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + incr, R600_PTE_VALID); + + count *= RADEON_VM_PTE_COUNT; + radeon_asic_vm_set_page(rdev, ib, last_pt, 0, + count, 0, 0); + } + + return 0; +} + +/** + * radeon_vm_update_ptes - make sure that page tables are valid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to + * @flags: mapping flags + * + * Update the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_update_ptes(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + static const uint64_t mask = RADEON_VM_PTE_COUNT - 1; + + uint64_t last_pte = ~0, last_dst = ~0; + unsigned count = 0; + uint64_t addr; + + start = start / RADEON_GPU_PAGE_SIZE; + end = end / RADEON_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + for (addr = start; addr < end; ) { + uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE; + unsigned nptes; + uint64_t pte; + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = RADEON_VM_PTE_COUNT - (addr & mask); + + pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte += (addr & mask) * 8; + + if ((last_pte + 8 * count) != pte) { + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, + flags); + } + + count = nptes; + last_pte = pte; + last_dst = dst; + } else { + count += nptes; + } + + addr += nptes; + dst += nptes * RADEON_GPU_PAGE_SIZE; + } + + if (count) { + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, + RADEON_GPU_PAGE_SIZE, flags); + } +} + +/** + * radeon_vm_bo_update - map a bo into the vm page table + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * @mem: ttm mem + * + * Fill in the page table entries for @bo (cayman+). + * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved & global and local mutex must be locked! + */ +int radeon_vm_bo_update(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_bo *bo, + struct ttm_mem_reg *mem) +{ + struct radeon_ib ib; + struct radeon_bo_va *bo_va; + unsigned nptes, npdes, ndw; + uint64_t addr; + int r; + + /* nothing to do if vm isn't bound */ + if (vm->page_directory == NULL) + return 0; + + bo_va = radeon_vm_bo_find(vm, bo); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + if (!bo_va->soffset) { + dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", + bo, vm); + return -EINVAL; + } + + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) + return 0; + + bo_va->flags &= ~RADEON_VM_PAGE_VALID; + bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; + if (mem) { + addr = mem->start << PAGE_SHIFT; + if (mem->mem_type != TTM_PL_SYSTEM) { + bo_va->flags |= RADEON_VM_PAGE_VALID; + bo_va->valid = true; + } + if (mem->mem_type == TTM_PL_TT) { + bo_va->flags |= RADEON_VM_PAGE_SYSTEM; + } else { + addr += rdev->vm_manager.vram_base_offset; + } + } else { + addr = 0; + bo_va->valid = false; + } + + trace_radeon_vm_bo_update(bo_va); + + nptes = radeon_bo_ngpu_pages(bo); + + /* assume two extra pdes in case the mapping overlaps the borders */ + npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; + + /* padding, etc. */ + ndw = 64; + + if (RADEON_VM_BLOCK_SIZE > 11) + /* reserve space for one header for every 2k dwords */ + ndw += (nptes >> 11) * 4; + else + /* reserve space for one header for + every (1 << BLOCK_SIZE) entries */ + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4; + + /* reserve space for pte addresses */ + ndw += nptes * 2; + + /* reserve space for one header for every 2k dwords */ + ndw += (npdes >> 11) * 4; + + /* reserve space for pde addresses */ + ndw += npdes * 2; + + /* reserve space for clearing new page tables */ + ndw += npdes * 2 * RADEON_VM_PTE_COUNT; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0; + + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, + addr, radeon_vm_page_flags(bo_va->flags)); + + radeon_semaphore_sync_to(ib.semaphore, vm->fence); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_ib_free(rdev, &ib); + radeon_fence_unref(&vm->last_flush); + + return 0; +} + +/** + * radeon_vm_bo_rmv - remove a bo to a specific vm + * + * @rdev: radeon_device pointer + * @bo_va: requested bo_va + * + * Remove @bo_va->bo from the requested vm (cayman+). + * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and + * remove the ptes for @bo_va in the page table. + * Returns 0 for success. + * + * Object have to be reserved! + */ +int radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + int r = 0; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&bo_va->vm->mutex); + if (bo_va->soffset) { + r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + } + mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); + mutex_unlock(&bo_va->vm->mutex); + list_del(&bo_va->bo_list); + + kfree(bo_va); + return r; +} + +/** + * radeon_vm_bo_invalidate - mark the bo as invalid + * + * @rdev: radeon_device pointer + * @vm: requested vm + * @bo: radeon buffer object + * + * Mark @bo as invalid (cayman+). + */ +void radeon_vm_bo_invalidate(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct radeon_bo_va *bo_va; + + list_for_each_entry(bo_va, &bo->va, bo_list) { + bo_va->valid = false; + } +} + +/** + * radeon_vm_init - initialize a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Init @vm fields (cayman+). + */ +void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +{ + vm->id = 0; + vm->fence = NULL; + vm->last_flush = NULL; + vm->last_id_use = NULL; + mutex_init(&vm->mutex); + INIT_LIST_HEAD(&vm->list); + INIT_LIST_HEAD(&vm->va); +} + +/** + * radeon_vm_fini - tear down a vm instance + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Tear down @vm (cayman+). + * Unbind the VM and remove all bos from the vm bo list + */ +void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int r; + + mutex_lock(&rdev->vm_manager.lock); + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&rdev->vm_manager.lock); + + if (!list_empty(&vm->va)) { + dev_err(rdev->dev, "still active bo inside vm\n"); + } + list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { + list_del_init(&bo_va->vm_list); + r = radeon_bo_reserve(bo_va->bo, false); + if (!r) { + list_del_init(&bo_va->bo_list); + radeon_bo_unreserve(bo_va->bo); + kfree(bo_va); + } + } + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->last_id_use); + mutex_unlock(&vm->mutex); +}
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon.h | 3 +++ drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_ring.c | 16 +++++++--------- drivers/gpu/drm/radeon/radeon_vm.c | 31 ++++++++++++++++++++++++++++--- 4 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7337b35..0c03bcb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2796,6 +2796,9 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f28a8d8..0b3ba38 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -430,10 +430,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); }
- if (!r) { - radeon_vm_fence(rdev, vm, parser->ib.fence); - } - out: radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index fa14011..665591a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -153,11 +153,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; }
- /* if we can't remember our last VM flush then flush now! */ - /* XXX figure out why we have to flush for every IB */ - if (ib->vm /*&& !ib->vm->last_flush*/) { - radeon_ring_vm_flush(rdev, ib->ring, ib->vm); - } + if (ib->vm) + radeon_vm_flush(rdev, ib->vm, ib->ring); + if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -172,10 +170,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } - /* we just flushed the VM, remember that */ - if (ib->vm && !ib->vm->last_flush) { - ib->vm->last_flush = radeon_fence_ref(ib->fence); - } + + if (ib->vm) + radeon_vm_fence(rdev, ib->vm, ib->fence); + radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 433b1eb..5160176 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -379,6 +379,27 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, }
/** + * radeon_vm_flush - hardware flush the vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to flush + * @ring: ring to use for flush + * + * Flush the vm (cayman+). + * + * Global and local mutex must be locked! + */ +void radeon_vm_flush(struct radeon_device *rdev, + struct radeon_vm *vm, + int ring) +{ + /* if we can't remember our last VM flush then flush now! */ + /* XXX figure out why we have to flush all the time */ + if (!vm->last_flush || true) + radeon_ring_vm_flush(rdev, ring, vm); +} + +/** * radeon_vm_fence - remember fence for vm * * @rdev: radeon_device pointer @@ -394,14 +415,18 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence) { - radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); - radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(fence);
+ radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + radeon_fence_unref(&vm->last_id_use); vm->last_id_use = radeon_fence_ref(fence); + + /* we just flushed the VM, remember that */ + if (!vm->last_flush) + vm->last_flush = radeon_fence_ref(fence); }
/**
From: Christian König christian.koenig@amd.com
No need to make it more complicated than necessary, just allocate the page tables as normal BO and flush whenever the address change.
v2: update comments and function name v3: squash bug fixes, page directory and tables patch
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 23 +- drivers/gpu/drm/radeon/radeon_cs.c | 45 +-- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_kms.c | 4 +- drivers/gpu/drm/radeon/radeon_vm.c | 494 +++++++++++++++------------------ 5 files changed, 272 insertions(+), 295 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 0c03bcb..259cd20 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -854,17 +854,22 @@ struct radeon_mec { #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6)
+struct radeon_vm_pt { + struct radeon_bo *bo; + uint64_t addr; +}; + struct radeon_vm { - struct list_head list; struct list_head va; unsigned id;
/* contains the page directory */ - struct radeon_sa_bo *page_directory; + struct radeon_bo *page_directory; uint64_t pd_gpu_addr; + unsigned max_pde_used;
/* array of page tables, one for each page directory entry */ - struct radeon_sa_bo **page_tables; + struct radeon_vm_pt *page_tables;
struct mutex mutex; /* last fence for cs using this vm */ @@ -877,9 +882,7 @@ struct radeon_vm {
struct radeon_vm_manager { struct mutex lock; - struct list_head lru_vm; struct radeon_fence *active[RADEON_NUM_VM]; - struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ unsigned nvm; @@ -1008,6 +1011,7 @@ struct radeon_cs_parser { unsigned nrelocs; struct radeon_cs_reloc *relocs; struct radeon_cs_reloc **relocs_ptr; + struct radeon_bo_list *vm_bos; struct list_head validated; unsigned dma_reloc_idx; /* indices of various chunks */ @@ -2790,10 +2794,11 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev, */ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, @@ -2803,6 +2808,8 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm); int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0b3ba38..dd3c71d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -109,6 +109,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); } + + if (p->cs_flags & RADEON_CS_USE_VM) + p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, + &p->validated); + return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); }
@@ -320,6 +325,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo kfree(parser->track); kfree(parser->relocs); kfree(parser->relocs_ptr); + kfree(parser->vm_bos); for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); @@ -359,24 +365,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; }
-static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { - struct radeon_device *rdev = parser->rdev; - struct radeon_bo_list *lobj; - struct radeon_bo *bo; - int r; + struct radeon_device *rdev = p->rdev; + int i, r;
- r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); - if (r) { + r = radeon_vm_update_page_directory(rdev, vm); + if (r) return r; - } - list_for_each_entry(lobj, &parser->validated, tv.head) { - bo = lobj->bo; - r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem); - if (r) { + + r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) + return r; + + for (i = 0; i < p->nrelocs; i++) { + struct radeon_bo *bo; + + /* ignore duplicates */ + if (p->relocs_ptr[i] != &p->relocs[i]) + continue; + + bo = p->relocs[i].robj; + r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + if (r) return r; - } } return 0; } @@ -410,10 +424,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_alloc_pt(rdev, vm); - if (r) { - goto out; - } r = radeon_bo_vm_update_pte(parser, vm); if (r) { goto out; @@ -431,7 +441,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, }
out: - radeon_vm_add_to_lru(rdev, vm); mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index fa7841b..e58dbab 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1198,7 +1198,6 @@ int radeon_device_init(struct radeon_device *rdev, * Max GPUVM size for cayman and SI is 40 bits. */ rdev->vm_manager.max_pfn = 1 << 20; - INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
/* Set asic functions */ r = radeon_asic_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eea70b0..980bea8 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -544,7 +544,9 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; }
- radeon_vm_init(rdev, &fpriv->vm); + r = radeon_vm_init(rdev, &fpriv->vm); + if (r) + return r;
r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 5160176..99fc229 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -84,85 +84,19 @@ static unsigned radeon_vm_directory_size(struct radeon_device *rdev) */ int radeon_vm_manager_init(struct radeon_device *rdev) { - struct radeon_vm *vm; - struct radeon_bo_va *bo_va; int r; - unsigned size;
if (!rdev->vm_manager.enabled) { - /* allocate enough for 2 full VM pts */ - size = radeon_vm_directory_size(rdev); - size += rdev->vm_manager.max_pfn * 8; - size *= 2; - r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, - RADEON_GPU_PAGE_ALIGN(size), - RADEON_VM_PTB_ALIGN_SIZE, - RADEON_GEM_DOMAIN_VRAM); - if (r) { - dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", - (rdev->vm_manager.max_pfn * 8) >> 10); - return r; - } - r = radeon_asic_vm_init(rdev); if (r) return r;
rdev->vm_manager.enabled = true; - - r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); - if (r) - return r; - } - - /* restore page table */ - list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->page_directory == NULL) - continue; - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } } return 0; }
/** - * radeon_vm_free_pt - free the page table for a specific vm - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Free the page table of a specific vm (cayman+). - * - * Global and local mutex must be lock! - */ -static void radeon_vm_free_pt(struct radeon_device *rdev, - struct radeon_vm *vm) -{ - struct radeon_bo_va *bo_va; - int i; - - if (!vm->page_directory) - return; - - list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - - list_for_each_entry(bo_va, &vm->va, vm_list) { - bo_va->valid = false; - } - - if (vm->page_tables == NULL) - return; - - for (i = 0; i < radeon_vm_num_pdes(rdev); i++) - radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence); - - kfree(vm->page_tables); -} - -/** * radeon_vm_manager_fini - tear down the vm manager * * @rdev: radeon_device pointer @@ -171,155 +105,61 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, */ void radeon_vm_manager_fini(struct radeon_device *rdev) { - struct radeon_vm *vm, *tmp; int i;
if (!rdev->vm_manager.enabled) return;
mutex_lock(&rdev->vm_manager.lock); - /* free all allocated page tables */ - list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&vm->mutex); - } - for (i = 0; i < RADEON_NUM_VM; ++i) { + for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); - } radeon_asic_vm_fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); - - radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); - radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager); rdev->vm_manager.enabled = false; + mutex_unlock(&rdev->vm_manager.lock); }
/** - * radeon_vm_evict - evict page table to make room for new one - * - * @rdev: radeon_device pointer - * @vm: VM we want to allocate something for + * radeon_vm_get_bos - add the vm BOs to a validation list * - * Evict a VM from the lru, making sure that it isn't @vm. (cayman+). - * Returns 0 for success, -ENOMEM for failure. + * @vm: vm providing the BOs + * @head: head of validation list * - * Global and local mutex must be locked! + * Add the page directory to the list of BOs to + * validate for command submission (cayman+). */ -static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm) +struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, + struct radeon_vm *vm, + struct list_head *head) { - struct radeon_vm *vm_evict; + struct radeon_bo_list *list; + unsigned i, idx, size;
- if (list_empty(&rdev->vm_manager.lru_vm)) - return -ENOMEM; - - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, - struct radeon_vm, list); - if (vm_evict == vm) - return -ENOMEM; - - mutex_lock(&vm_evict->mutex); - radeon_vm_free_pt(rdev, vm_evict); - mutex_unlock(&vm_evict->mutex); - return 0; -} - -/** - * radeon_vm_alloc_pt - allocates a page table for a VM - * - * @rdev: radeon_device pointer - * @vm: vm to bind - * - * Allocate a page table for the requested vm (cayman+). - * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! - */ -int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) -{ - unsigned pd_size, pd_entries, pts_size; - struct radeon_ib ib; - int r; - - if (vm == NULL) { - return -EINVAL; - } - - if (vm->page_directory != NULL) { - return 0; - } - - pd_size = radeon_vm_directory_size(rdev); - pd_entries = radeon_vm_num_pdes(rdev); - -retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_directory, pd_size, - RADEON_VM_PTB_ALIGN_SIZE, false); - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - - } else if (r) { - return r; - } - - vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory); - - /* Initially clear the page directory */ - r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, - NULL, pd_entries * 2 + 64); - if (r) { - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - - ib.length_dw = 0; + size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_bo_list); + list = kmalloc(size, GFP_KERNEL); + if (!list) + return NULL;
- radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, - 0, pd_entries, 0, 0); + /* add the vm page table to the list */ + list[0].bo = vm->page_directory; + list[0].written = 1; + list[0].domain = RADEON_GEM_DOMAIN_VRAM; + list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[0].tv.bo = &vm->page_directory->tbo; + radeon_bo_list_add_object(&list[0], head);
- radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); - if (r) { - radeon_ib_free(rdev, &ib); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return r; - } - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); - radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush); - - /* allocate page table array */ - pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { + if (!vm->page_tables[i].bo) + continue;
- if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence); - return -ENOMEM; + list[idx].bo = vm->page_tables[i].bo; + list[idx].written = 1; + list[idx].domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM; + list[idx].tv.bo = &list[idx].bo->tbo; + radeon_bo_list_add_object(&list[idx++], head); }
- return 0; -} - -/** - * radeon_vm_add_to_lru - add VMs page table to LRU list - * - * @rdev: radeon_device pointer - * @vm: vm to add to LRU - * - * Add the allocated page table to the LRU list (cayman+). - * - * Global mutex must be locked! - */ -void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm) -{ - list_del_init(&vm->list); - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return list; }
/** @@ -393,10 +233,14 @@ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { + uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + /* if we can't remember our last VM flush then flush now! */ /* XXX figure out why we have to flush all the time */ - if (!vm->last_flush || true) + if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) { + vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, ring, vm); + } }
/** @@ -496,6 +340,63 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, }
/** + * radeon_vm_clear_bo - initially clear the page dir/table + * + * @rdev: radeon_device pointer + * @bo: bo to clear + */ +static int radeon_vm_clear_bo(struct radeon_device *rdev, + struct radeon_bo *bo) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct radeon_ib ib; + unsigned entries; + uint64_t addr; + int r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error; + + addr = radeon_bo_gpu_offset(bo); + entries = radeon_bo_size(bo) / 8; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, + NULL, entries * 2 + 64); + if (r) + goto error; + + ib.length_dw = 0; + + radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0); + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) + goto error; + + ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); + radeon_ib_free(rdev, &ib); + + return 0; + +error: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +/** * radeon_vm_bo_set_addr - set bos virtual address inside a vm * * @rdev: radeon_device pointer @@ -519,7 +420,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_vm *vm = bo_va->vm; struct radeon_bo_va *tmp; struct list_head *head; - unsigned last_pfn; + unsigned last_pfn, pt_idx; + int r;
if (soffset) { /* make sure object fit at this offset */ @@ -570,8 +472,53 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, bo_va->valid = false; list_move(&bo_va->vm_list, head);
+ soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + + if (eoffset > vm->max_pde_used) + vm->max_pde_used = eoffset; + + radeon_bo_unreserve(bo_va->bo); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { + struct radeon_bo *pt; + + if (vm->page_tables[pt_idx].bo) + continue; + + /* drop mutex to allocate and clear page table */ + mutex_unlock(&vm->mutex); + + r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, + RADEON_GPU_PAGE_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, pt); + if (r) { + radeon_bo_unref(&pt); + radeon_bo_reserve(bo_va->bo, false); + return r; + } + + /* aquire mutex again */ + mutex_lock(&vm->mutex); + if (vm->page_tables[pt_idx].bo) { + /* someone else allocated the pt in the meantime */ + mutex_unlock(&vm->mutex); + radeon_bo_unref(&pt); + mutex_lock(&vm->mutex); + continue; + } + + vm->page_tables[pt_idx].addr = 0; + vm->page_tables[pt_idx].bo = pt; + } + mutex_unlock(&vm->mutex); - return 0; + return radeon_bo_reserve(bo_va->bo, false); }
/** @@ -631,58 +578,53 @@ static uint32_t radeon_vm_page_flags(uint32_t flags) * * Global and local mutex must be locked! */ -static int radeon_vm_update_pdes(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_ib *ib, - uint64_t start, uint64_t end) +int radeon_vm_update_page_directory(struct radeon_device *rdev, + struct radeon_vm *vm) { static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
+ uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); uint64_t last_pde = ~0, last_pt = ~0; - unsigned count = 0; - uint64_t pt_idx; + unsigned count = 0, pt_idx, ndw; + struct radeon_ib ib; int r;
- start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; - end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE; + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += vm->max_pde_used * 12; + + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); + if (r) + return r; + ib.length_dw = 0;
/* walk over the address space and update the page directory */ - for (pt_idx = start; pt_idx <= end; ++pt_idx) { + for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { + struct radeon_bo *bo = vm->page_tables[pt_idx].bo; uint64_t pde, pt;
- if (vm->page_tables[pt_idx]) + if (bo == NULL) continue;
-retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, - &vm->page_tables[pt_idx], - RADEON_VM_PTE_COUNT * 8, - RADEON_GPU_PAGE_SIZE, false); - - if (r == -ENOMEM) { - r = radeon_vm_evict(rdev, vm); - if (r) - return r; - goto retry; - } else if (r) { - return r; - } - - pde = vm->pd_gpu_addr + pt_idx * 8; - - pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pt = radeon_bo_gpu_offset(bo); + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt;
+ pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || ((last_pt + incr * count) != pt)) {
if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID); - - count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); }
count = 1; @@ -693,14 +635,22 @@ retry: } }
- if (count) { - radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, + if (count) + radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count, incr, R600_PTE_VALID);
- count *= RADEON_VM_PTE_COUNT; - radeon_asic_vm_set_page(rdev, ib, last_pt, 0, - count, 0, 0); + if (ib.length_dw != 0) { + radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + radeon_ib_free(rdev, &ib); + return r; + } + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&vm->last_flush); } + radeon_ib_free(rdev, &ib);
return 0; } @@ -745,7 +695,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, else nptes = RADEON_VM_PTE_COUNT - (addr & mask);
- pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); + pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo); pte += (addr & mask) * 8;
if ((last_pte + 8 * count) != pte) { @@ -795,14 +745,10 @@ int radeon_vm_bo_update(struct radeon_device *rdev, { struct radeon_ib ib; struct radeon_bo_va *bo_va; - unsigned nptes, npdes, ndw; + unsigned nptes, ndw; uint64_t addr; int r;
- /* nothing to do if vm isn't bound */ - if (vm->page_directory == NULL) - return 0; - bo_va = radeon_vm_bo_find(vm, bo); if (bo_va == NULL) { dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); @@ -840,9 +786,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
nptes = radeon_bo_ngpu_pages(bo);
- /* assume two extra pdes in case the mapping overlaps the borders */ - npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* padding, etc. */ ndw = 64;
@@ -857,15 +800,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, /* reserve space for pte addresses */ ndw += nptes * 2;
- /* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 4; - - /* reserve space for pde addresses */ - ndw += npdes * 2; - - /* reserve space for clearing new page tables */ - ndw += npdes * 2 * RADEON_VM_PTE_COUNT; - /* update too big for an IB */ if (ndw > 0xfffff) return -ENOMEM; @@ -875,12 +809,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, return r; ib.length_dw = 0;
- r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); - if (r) { - radeon_ib_free(rdev, &ib); - return r; - } - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, radeon_vm_page_flags(bo_va->flags));
@@ -957,15 +885,43 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, * * Init @vm fields (cayman+). */ -void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) +int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { + unsigned pd_size, pd_entries, pts_size; + int r; + vm->id = 0; vm->fence = NULL; vm->last_flush = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->va); + + pd_size = radeon_vm_directory_size(rdev); + pd_entries = radeon_vm_num_pdes(rdev); + + /* allocate page table array */ + pts_size = pd_entries * sizeof(struct radeon_vm_pt); + vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + if (vm->page_tables == NULL) { + DRM_ERROR("Cannot allocate memory for page table array\n"); + return -ENOMEM; + } + + r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false, + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); + if (r) + return r; + + r = radeon_vm_clear_bo(rdev, vm->page_directory); + if (r) { + radeon_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + return r; + } + + return 0; }
/** @@ -980,12 +936,7 @@ void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va, *tmp; - int r; - - mutex_lock(&rdev->vm_manager.lock); - mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); + int i, r;
if (!list_empty(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); @@ -999,8 +950,17 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) kfree(bo_va); } } + + + for (i = 0; i < radeon_vm_num_pdes(rdev); i++) + radeon_bo_unref(&vm->page_tables[i].bo); + kfree(vm->page_tables); + + radeon_bo_unref(&vm->page_directory); + radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_id_use); - mutex_unlock(&vm->mutex); + + mutex_destroy(&vm->mutex); }
From: Christian König christian.koenig@amd.com
Not needed any more.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_cs.c | 4 ---- drivers/gpu/drm/radeon/radeon_device.c | 3 +-- drivers/gpu/drm/radeon/radeon_ring.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_vm.c | 10 +++------- 5 files changed, 11 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 259cd20..e5fc015 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -881,7 +881,6 @@ struct radeon_vm { };
struct radeon_vm_manager { - struct mutex lock; struct radeon_fence *active[RADEON_NUM_VM]; uint32_t max_pfn; /* number of VMIDs */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dd3c71d..f9a632a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -422,7 +422,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev);
- mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); r = radeon_bo_vm_update_pte(parser, vm); if (r) { @@ -430,8 +429,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser); radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); - radeon_semaphore_sync_to(parser->ib.semaphore, - radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -442,7 +439,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
out: mutex_unlock(&vm->mutex); - mutex_unlock(&rdev->vm_manager.lock); return r; }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index e58dbab..7db44de 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1191,8 +1191,7 @@ int radeon_device_init(struct radeon_device *rdev, r = radeon_gem_init(rdev); if (r) return r; - /* initialize vm here */ - mutex_init(&rdev->vm_manager.lock); + /* Adjust VM size here. * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 665591a..5321e24 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, return r; }
+ /* grab a vm id if necessary */ + if (ib->vm) { + struct radeon_fence *vm_id_fence; + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); + } + /* sync with other rings */ r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); if (r) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 99fc229..22fe985 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -110,12 +110,10 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) if (!rdev->vm_manager.enabled) return;
- mutex_lock(&rdev->vm_manager.lock); for (i = 0; i < RADEON_NUM_VM; ++i) radeon_fence_unref(&rdev->vm_manager.active[i]); radeon_asic_vm_fini(rdev); rdev->vm_manager.enabled = false; - mutex_unlock(&rdev->vm_manager.lock); }
/** @@ -736,7 +734,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, * Fill in the page table entries for @bo (cayman+). * Returns 0 for success, -EINVAL for failure. * - * Object have to be reserved & global and local mutex must be locked! + * Object have to be reserved and mutex must be locked! */ int radeon_vm_bo_update(struct radeon_device *rdev, struct radeon_vm *vm, @@ -844,12 +842,10 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, { int r = 0;
- mutex_lock(&rdev->vm_manager.lock); mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) { + if (bo_va->soffset) r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); - } - mutex_unlock(&rdev->vm_manager.lock); + list_del(&bo_va->vm_list); mutex_unlock(&bo_va->vm->mutex); list_del(&bo_va->bo_list);
From: Christian König christian.koenig@amd.com
Not needed any more.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_object.h | 2 +- drivers/gpu/drm/radeon/radeon_ring.c | 2 +- drivers/gpu/drm/radeon/radeon_sa.c | 7 ++----- drivers/gpu/drm/radeon/radeon_semaphore.c | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 209b111..211d29e 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -181,7 +181,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, extern int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block); + unsigned size, unsigned align); extern void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 5321e24..8b0dfdd 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, { int r;
- r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256); if (r) { dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c062580..adcf3e2 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, struct radeon_sa_bo **sa_bo, - unsigned size, unsigned align, bool block) + unsigned size, unsigned align) { struct radeon_fence *fences[RADEON_NUM_RINGS]; unsigned tries[RADEON_NUM_RINGS]; @@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev, r = radeon_fence_wait_any(rdev, fences, false); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ - if (r == -ENOENT && block) { + if (r == -ENOENT) { r = wait_event_interruptible_locked( sa_manager->wq, radeon_sa_event(sa_manager, size, align) ); - - } else if (r == -ENOENT) { - r = -ENOMEM; }
} while (!r); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 6140af6..dbd6bcd 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, return -ENOMEM; } r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, - 8 * RADEON_NUM_SYNCS, 8, true); + 8 * RADEON_NUM_SYNCS, 8); if (r) { kfree(*semaphore); *semaphore = NULL;
dri-devel@lists.freedesktop.org