Second and hopefully last round for this patchset.
v2: Fix suspend/resume, and incorporate Jeromes comments.
Cheers, Christian.
From: Jerome Glisse jglisse@redhat.com
Virtual address need to be fenced to know when we can safely remove it. This patch also properly clear the pagetable. Previously it was serouisly broken.
Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking.
v2: For to update pagetable when unbinding bo (don't bailout if bo_va->valid is true). v3: Add kernel 3.5/3.4 comment. v4: Fix compilation warnings.
Signed-off-by: Jerome Glisse jglisse@redhat.com Reviewed-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cs.c | 32 +++++++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- drivers/gpu/drm/radeon/radeon_object.c | 6 +----- 5 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5431af2..8d75c65 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -300,6 +300,7 @@ struct radeon_bo_va { uint64_t soffset; uint64_t eoffset; uint32_t flags; + struct radeon_fence *fence; bool valid; };
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8a4c49e..b4a0db24 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; }
+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, + struct radeon_fence *fence) +{ + struct radeon_fpriv *fpriv = parser->filp->driver_priv; + struct radeon_vm *vm = &fpriv->vm; + struct radeon_bo_list *lobj; + + if (parser->chunk_ib_idx == -1) { + return; + } + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) { + return; + } + + list_for_each_entry(lobj, &parser->validated, tv.head) { + struct radeon_bo_va *bo_va; + struct radeon_bo *rbo = lobj->bo; + + bo_va = radeon_bo_va(rbo, vm); + radeon_fence_unref(&bo_va->fence); + bo_va->fence = radeon_fence_ref(fence); + } +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i;
- if (!error) + if (!error) { + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ + radeon_bo_vm_fence_va(parser, parser->ib.fence); ttm_eu_fence_buffer_objects(&parser->validated, parser->ib.fence); - else + } else { ttm_eu_backoff_reservation(&parser->validated); + }
if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
if (parser->chunk_ib_idx == -1) return 0; - if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) return 0;
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index b372005..9912182 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -EINVAL; }
- if (bo_va->valid) + if (bo_va->valid && mem) return 0;
ngpu_pages = radeon_bo_ngpu_pages(bo); @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; + int r;
bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) return 0;
+ /* wait for va use to end */ + while (bo_va->fence) { + r = radeon_fence_wait(bo_va->fence, false); + if (r) { + DRM_ERROR("error while waiting for fence: %d\n", r); + } + if (r == -EDEADLK) { + r = radeon_gpu_reset(rdev); + if (!r) + continue; + } + break; + } + radeon_fence_unref(&bo_va->fence); + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_vm_unbind_locked(rdev, vm); mutex_unlock(&rdev->vm_manager.lock);
- /* remove all bo */ + /* remove all bo at this point non are busy any more because unbind + * waited for the last vm fence to signal + */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (!r) { bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); list_del_init(&bo_va->bo_list); list_del_init(&bo_va->vm_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); kfree(bo_va); } @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_reserve(bo_va->bo, false); if (!r) { list_del_init(&bo_va->bo_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(bo_va->bo); kfree(bo_va); } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 84d0452..1b57b00 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj, struct radeon_device *rdev = rbo->rdev; struct radeon_fpriv *fpriv = file_priv->driver_priv; struct radeon_vm *vm = &fpriv->vm; - struct radeon_bo_va *bo_va, *tmp;
if (rdev->family < CHIP_CAYMAN) { return; }
if (radeon_bo_reserve(rbo, false)) { + dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n"); return; } - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { - if (bo_va->vm == vm) { - /* remove from this vm address space */ - mutex_lock(&vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); - } - } + radeon_vm_bo_rmv(rdev, vm, rbo); radeon_bo_unreserve(rbo); }
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1f1a4c8..1cb014b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { /* remove from all vm address space */ - mutex_lock(&bo_va->vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); } }
Store a reference to the VM into the IB structure, that makes calculating the IBs address a bit less complicated.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/ni.c | 5 +++-- drivers/gpu/drm/radeon/r100.c | 2 +- drivers/gpu/drm/radeon/r600.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 5 +++-- drivers/gpu/drm/radeon/radeon_cs.c | 18 +++--------------- drivers/gpu/drm/radeon/radeon_ring.c | 14 +++++++++++--- drivers/gpu/drm/radeon/si.c | 5 +++-- 7 files changed, 25 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 9945d86..efa3ab9 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -871,12 +871,13 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + radeon_ring_write(ring, ib->length_dw | + (ib->vm ? (ib->vm->id << 24) : 0));
/* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm_id); + radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); radeon_ring_write(ring, 0xFFFFFFFF); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 8acb34f..5f5f524 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3816,7 +3816,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, 256); + r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 637280f..75d448d 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2635,7 +2635,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); - r = radeon_ib_get(rdev, ring->idx, &ib, 256); + r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256); if (r) { DRM_ERROR("radeon: failed to get ib (%d).\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8d75c65..ef1fdf2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -611,7 +611,7 @@ struct radeon_ib { uint32_t *ptr; int ring; struct radeon_fence *fence; - unsigned vm_id; + struct radeon_vm *vm; bool is_const_ib; struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; @@ -753,7 +753,8 @@ struct si_rlc { };
int radeon_ib_get(struct radeon_device *rdev, int ring, - struct radeon_ib *ib, unsigned size); + struct radeon_ib *ib, struct radeon_vm *vm, + unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, struct radeon_ib *const_ib); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index b4a0db24..0a9d1eb 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -363,7 +363,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, * uncached). */ r = radeon_ib_get(rdev, parser->ring, &parser->ib, - ib_chunk->length_dw * 4); + NULL, ib_chunk->length_dw * 4); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -380,7 +380,6 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, return r; } radeon_cs_sync_rings(parser); - parser->ib.vm_id = 0; r = radeon_ib_schedule(rdev, &parser->ib, NULL); if (r) { DRM_ERROR("Failed to schedule IB !\n"); @@ -426,7 +425,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, return -EINVAL; } r = radeon_ib_get(rdev, parser->ring, &parser->const_ib, - ib_chunk->length_dw * 4); + vm, ib_chunk->length_dw * 4); if (r) { DRM_ERROR("Failed to get const ib !\n"); return r; @@ -450,7 +449,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, return -EINVAL; } r = radeon_ib_get(rdev, parser->ring, &parser->ib, - ib_chunk->length_dw * 4); + vm, ib_chunk->length_dw * 4); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -478,19 +477,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser);
- parser->ib.vm_id = vm->id; - /* ib pool is bind at 0 in virtual address space, - * so gpu_addr is the offset inside the pool bo - */ - parser->ib.gpu_addr = parser->ib.sa_bo->soffset; - if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { - parser->const_ib.vm_id = vm->id; - /* ib pool is bind at 0 in virtual address space, - * so gpu_addr is the offset inside the pool bo - */ - parser->const_ib.gpu_addr = parser->const_ib.sa_bo->soffset; r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); } else { r = radeon_ib_schedule(rdev, &parser->ib, NULL); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index ec79b37..9ab3a3c 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -58,7 +58,8 @@ int radeon_debugfs_sa_init(struct radeon_device *rdev); * Returns 0 on success, error on failure. */ int radeon_ib_get(struct radeon_device *rdev, int ring, - struct radeon_ib *ib, unsigned size) + struct radeon_ib *ib, struct radeon_vm *vm, + unsigned size) { int i, r;
@@ -76,8 +77,15 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, ib->ring = ring; ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); - ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); - ib->vm_id = 0; + ib->vm = vm; + if (vm) { + /* ib pool is bind at 0 in virtual address space, + * so gpu_addr is the offset inside the pool bo + */ + ib->gpu_addr = ib->sa_bo->soffset; + } else { + ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); + } ib->is_const_ib = false; for (i = 0; i < RADEON_NUM_RINGS; ++i) ib->sync_to[i] = NULL; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c053f81..4ebcb33 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -1798,13 +1798,14 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); - radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24)); + radeon_ring_write(ring, ib->length_dw | + (ib->vm ? (ib->vm->id << 24) : 0));
if (!ib->is_const_ib) { /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm_id); + radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | PACKET3_TC_ACTION_ENA |
So it looks more like the rest of the driver.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 35 +++++++++++++----------- drivers/gpu/drm/radeon/radeon_asic.c | 50 ++++++++++++++++++---------------- drivers/gpu/drm/radeon/radeon_gart.c | 16 +++++------ 3 files changed, 54 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ef1fdf2..5163346 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -660,28 +660,12 @@ struct radeon_vm { struct radeon_fence *fence; };
-struct radeon_vm_funcs { - int (*init)(struct radeon_device *rdev); - void (*fini)(struct radeon_device *rdev); - /* cs mutex must be lock for schedule_ib */ - int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); - void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm); - void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); - uint32_t (*page_flags)(struct radeon_device *rdev, - struct radeon_vm *vm, - uint32_t flags); - void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, - unsigned pfn, uint64_t addr, uint32_t flags); -}; - struct radeon_vm_manager { struct mutex lock; struct list_head lru_vm; uint32_t use_bitmap; struct radeon_sa_manager sa_manager; uint32_t max_pfn; - /* fields constant after init */ - const struct radeon_vm_funcs *funcs; /* number of VMIDs */ unsigned nvm; /* vram base address for page table entry */ @@ -1147,6 +1131,18 @@ struct radeon_asic { void (*tlb_flush)(struct radeon_device *rdev); int (*set_page)(struct radeon_device *rdev, int i, uint64_t addr); } gart; + struct { + int (*init)(struct radeon_device *rdev); + void (*fini)(struct radeon_device *rdev); + int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); + void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm); + void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); + uint32_t (*page_flags)(struct radeon_device *rdev, + struct radeon_vm *vm, + uint32_t flags); + void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, + unsigned pfn, uint64_t addr, uint32_t flags); + } vm; /* ring specific callbacks */ struct { void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); @@ -1698,6 +1694,13 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) +#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) +#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) +#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) +#define radeon_asic_vm_unbind(rdev, v) (rdev)->asic->vm.unbind((rdev), (v)) +#define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v)) +#define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) +#define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 973417c..7d29d53 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1342,16 +1342,6 @@ static struct radeon_asic btc_asic = { }, };
-static const struct radeon_vm_funcs cayman_vm_funcs = { - .init = &cayman_vm_init, - .fini = &cayman_vm_fini, - .bind = &cayman_vm_bind, - .unbind = &cayman_vm_unbind, - .tlb_flush = &cayman_vm_tlb_flush, - .page_flags = &cayman_vm_page_flags, - .set_page = &cayman_vm_set_page, -}; - static struct radeon_asic cayman_asic = { .init = &cayman_init, .fini = &cayman_fini, @@ -1366,6 +1356,15 @@ static struct radeon_asic cayman_asic = { .tlb_flush = &cayman_pcie_gart_tlb_flush, .set_page = &rs600_gart_set_page, }, + .vm = { + .init = &cayman_vm_init, + .fini = &cayman_vm_fini, + .bind = &cayman_vm_bind, + .unbind = &cayman_vm_unbind, + .tlb_flush = &cayman_vm_tlb_flush, + .page_flags = &cayman_vm_page_flags, + .set_page = &cayman_vm_set_page, + }, .ring = { [RADEON_RING_TYPE_GFX_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1460,6 +1459,15 @@ static struct radeon_asic trinity_asic = { .tlb_flush = &cayman_pcie_gart_tlb_flush, .set_page = &rs600_gart_set_page, }, + .vm = { + .init = &cayman_vm_init, + .fini = &cayman_vm_fini, + .bind = &cayman_vm_bind, + .unbind = &cayman_vm_unbind, + .tlb_flush = &cayman_vm_tlb_flush, + .page_flags = &cayman_vm_page_flags, + .set_page = &cayman_vm_set_page, + }, .ring = { [RADEON_RING_TYPE_GFX_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1540,16 +1548,6 @@ static struct radeon_asic trinity_asic = { }, };
-static const struct radeon_vm_funcs si_vm_funcs = { - .init = &si_vm_init, - .fini = &si_vm_fini, - .bind = &si_vm_bind, - .unbind = &si_vm_unbind, - .tlb_flush = &si_vm_tlb_flush, - .page_flags = &cayman_vm_page_flags, - .set_page = &cayman_vm_set_page, -}; - static struct radeon_asic si_asic = { .init = &si_init, .fini = &si_fini, @@ -1564,6 +1562,15 @@ static struct radeon_asic si_asic = { .tlb_flush = &si_pcie_gart_tlb_flush, .set_page = &rs600_gart_set_page, }, + .vm = { + .init = &si_vm_init, + .fini = &si_vm_fini, + .bind = &si_vm_bind, + .unbind = &si_vm_unbind, + .tlb_flush = &si_vm_tlb_flush, + .page_flags = &cayman_vm_page_flags, + .set_page = &cayman_vm_set_page, + }, .ring = { [RADEON_RING_TYPE_GFX_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1769,13 +1776,11 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->asic = &cayman_asic; /* set num crtcs */ rdev->num_crtc = 6; - rdev->vm_manager.funcs = &cayman_vm_funcs; break; case CHIP_ARUBA: rdev->asic = &trinity_asic; /* set num crtcs */ rdev->num_crtc = 4; - rdev->vm_manager.funcs = &cayman_vm_funcs; break; case CHIP_TAHITI: case CHIP_PITCAIRN: @@ -1783,7 +1788,6 @@ int radeon_asic_init(struct radeon_device *rdev) rdev->asic = &si_asic; /* set num crtcs */ rdev->num_crtc = 6; - rdev->vm_manager.funcs = &si_vm_funcs; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 9912182..2902e5f 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -448,7 +448,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) return r; }
- r = rdev->vm_manager.funcs->init(rdev); + r = radeon_asic_vm_init(rdev); if (r) return r; @@ -476,7 +476,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) } }
- r = rdev->vm_manager.funcs->bind(rdev, vm, vm->id); + r = radeon_asic_vm_bind(rdev, vm, vm->id); if (r) { DRM_ERROR("Failed to bind vm %d!\n", vm->id); } @@ -522,7 +522,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, radeon_fence_unref(&vm->fence);
/* hw unbind */ - rdev->vm_manager.funcs->unbind(rdev, vm); + radeon_asic_vm_unbind(rdev, vm); rdev->vm_manager.use_bitmap &= ~(1 << vm->id); list_del_init(&vm->list); vm->id = -1; @@ -553,7 +553,7 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } - rdev->vm_manager.funcs->fini(rdev); + radeon_asic_vm_fini(rdev); mutex_unlock(&rdev->vm_manager.lock);
radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); @@ -639,7 +639,7 @@ retry_id: }
/* do hw bind */ - r = rdev->vm_manager.funcs->bind(rdev, vm, id); + r = radeon_asic_vm_bind(rdev, vm, id); if (r) { radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); return r; @@ -830,14 +830,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } } pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; - flags = rdev->vm_manager.funcs->page_flags(rdev, bo_va->vm, bo_va->flags); + flags = radeon_asic_vm_page_flags(rdev, bo_va->vm, bo_va->flags); for (i = 0, addr = 0; i < ngpu_pages; i++) { if (mem && bo_va->valid) { addr = radeon_vm_get_addr(rdev, mem, i); } - rdev->vm_manager.funcs->set_page(rdev, bo_va->vm, i + pfn, addr, flags); + radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags); } - rdev->vm_manager.funcs->tlb_flush(rdev, bo_va->vm); + radeon_asic_vm_tlb_flush(rdev, bo_va->vm); return 0; }
It actually isn't very useful.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/ni.c | 11 ----------- drivers/gpu/drm/radeon/radeon.h | 2 -- drivers/gpu/drm/radeon/radeon_asic.c | 3 --- drivers/gpu/drm/radeon/radeon_gart.c | 1 - drivers/gpu/drm/radeon/si.c | 12 ------------ 5 files changed, 29 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index efa3ab9..3b1aab3 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1501,17 +1501,6 @@ int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) return 0; }
-void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) -{ - WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0); - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-7 are the VM contexts0-7 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) { if (vm->id == -1) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5163346..aeb2d1f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1135,7 +1135,6 @@ struct radeon_asic { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); - void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm); void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); uint32_t (*page_flags)(struct radeon_device *rdev, struct radeon_vm *vm, @@ -1697,7 +1696,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) -#define radeon_asic_vm_unbind(rdev, v) (rdev)->asic->vm.unbind((rdev), (v)) #define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v)) #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 7d29d53..9d5f4f3 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1360,7 +1360,6 @@ static struct radeon_asic cayman_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .unbind = &cayman_vm_unbind, .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, @@ -1463,7 +1462,6 @@ static struct radeon_asic trinity_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .unbind = &cayman_vm_unbind, .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, @@ -1566,7 +1564,6 @@ static struct radeon_asic si_asic = { .init = &si_vm_init, .fini = &si_vm_fini, .bind = &si_vm_bind, - .unbind = &si_vm_unbind, .tlb_flush = &si_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 2902e5f..18a03ab 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -522,7 +522,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, radeon_fence_unref(&vm->fence);
/* hw unbind */ - radeon_asic_vm_unbind(rdev, vm); rdev->vm_manager.use_bitmap &= ~(1 << vm->id); list_del_init(&vm->list); vm->id = -1; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 4ebcb33..40a9a85 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2795,18 +2795,6 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) return 0; }
-void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) -{ - if (vm->id < 8) - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0); - else - WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) { if (vm->id == -1)
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon_cs.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0a9d1eb..85a80e4 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,19 +115,27 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; }
+static void radeon_cs_sync_to(struct radeon_cs_parser *p, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = p->ib.sync_to[fence->ring]; + p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other); +} + static void radeon_cs_sync_rings(struct radeon_cs_parser *p) { int i;
for (i = 0; i < p->nrelocs; i++) { - struct radeon_fence *a, *b; - - if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) + if (!p->relocs[i].robj) continue;
- a = p->relocs[i].robj->tbo.sync_obj; - b = p->ib.sync_to[a->ring]; - p->ib.sync_to[a->ring] = radeon_fence_later(a, b); + radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj); } }
Move flushing the VMs as function into the rings. First step to make VM operations async.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/ni.c | 31 ++++++++++++++++--------------- drivers/gpu/drm/radeon/radeon.h | 6 ++++-- drivers/gpu/drm/radeon/radeon_asic.c | 12 +++++++++--- drivers/gpu/drm/radeon/radeon_asic.h | 3 +-- drivers/gpu/drm/radeon/radeon_cs.c | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 4 +++- drivers/gpu/drm/radeon/radeon_ring.c | 8 ++++++++ drivers/gpu/drm/radeon/si.c | 15 --------------- 8 files changed, 42 insertions(+), 38 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3b1aab3..ad337e8 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1494,24 +1494,9 @@ int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0); WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn); WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-7 are the VM contexts0-7 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << id); return 0; }
-void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) -{ - if (vm->id == -1) - return; - - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-7 are the VM contexts0-7 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - #define R600_PTE_VALID (1 << 0) #define R600_PTE_SYSTEM (1 << 1) #define R600_PTE_SNOOPED (1 << 2) @@ -1543,3 +1528,19 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, addr |= flags; writeq(addr, ptr + (pfn * 8)); } + +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + + if (!ib->vm || ib->vm->id == -1) + return; + + /* flush hdp cache */ + radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); + radeon_ring_write(ring, 0x1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); + radeon_ring_write(ring, 1 << ib->vm->id); +} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index aeb2d1f..1228778 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -658,6 +658,8 @@ struct radeon_vm { struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; + /* last flush or NULL if we still need to flush */ + struct radeon_fence *last_flush; };
struct radeon_vm_manager { @@ -1135,7 +1137,6 @@ struct radeon_asic { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); - void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm); uint32_t (*page_flags)(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -1154,6 +1155,7 @@ struct radeon_asic { int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp); bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); + void (*vm_flush)(struct radeon_device *rdev, struct radeon_ib *ib); } ring[RADEON_NUM_RINGS]; /* irqs */ struct { @@ -1696,7 +1698,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) -#define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v)) #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) @@ -1705,6 +1706,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp)) +#define radeon_ring_vm_flush(rdev, r, ib) (rdev)->asic->ring[(r)].vm_flush((rdev), (ib)) #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9d5f4f3..b44ab96 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1360,7 +1360,6 @@ static struct radeon_asic cayman_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1374,6 +1373,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1384,6 +1384,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1394,6 +1395,7 @@ static struct radeon_asic cayman_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { @@ -1462,7 +1464,6 @@ static struct radeon_asic trinity_asic = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, .bind = &cayman_vm_bind, - .tlb_flush = &cayman_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1476,6 +1477,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1486,6 +1488,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1496,6 +1499,7 @@ static struct radeon_asic trinity_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &evergreen_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { @@ -1564,7 +1568,6 @@ static struct radeon_asic si_asic = { .init = &si_vm_init, .fini = &si_vm_fini, .bind = &si_vm_bind, - .tlb_flush = &si_vm_tlb_flush, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1578,6 +1581,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1588,6 +1592,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1598,6 +1603,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, + .vm_flush = &cayman_vm_flush, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index f4af243..87466d3 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -440,7 +440,7 @@ int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); -void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -470,7 +470,6 @@ int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); -void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
#endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 85a80e4..d4a804b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -484,6 +484,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); + radeon_cs_sync_to(parser, vm->last_flush);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 18a03ab..a6a3dca 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -520,6 +520,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, break; } radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush);
/* hw unbind */ rdev->vm_manager.use_bitmap &= ~(1 << vm->id); @@ -639,6 +640,7 @@ retry_id:
/* do hw bind */ r = radeon_asic_vm_bind(rdev, vm, id); + radeon_fence_unref(&vm->last_flush); if (r) { radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); return r; @@ -836,7 +838,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, } radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags); } - radeon_asic_vm_tlb_flush(rdev, bo_va->vm); + radeon_fence_unref(&vm->last_flush); return 0; }
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 9ab3a3c..ceb334b 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -160,6 +160,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (!need_sync) { radeon_semaphore_free(rdev, &ib->semaphore, NULL); } + /* if we can't remember our last VM flush then flush now! */ + if (ib->vm && !ib->vm->last_flush) { + radeon_ring_vm_flush(rdev, ib->ring, ib); + } if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); @@ -174,6 +178,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (const_ib) { const_ib->fence = radeon_fence_ref(ib->fence); } + /* we just flushed the VM, remember that */ + if (ib->vm && !ib->vm->last_flush) { + ib->vm->last_flush = radeon_fence_ref(ib->fence); + } radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 40a9a85..7ef16d6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2788,24 +2788,9 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) else WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2), vm->pt_gpu_addr >> 12); - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << id); return 0; }
-void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm) -{ - if (vm->id == -1) - return; - - /* flush hdp cache */ - WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); - /* bits 0-15 are the VM contexts0-15 */ - WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id); -} - /* * RLC */
Move binding onto the ring, simplifying handling a bit.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/ni.c | 20 ++--- drivers/gpu/drm/radeon/radeon.h | 30 +++++++- drivers/gpu/drm/radeon/radeon_asic.c | 9 +-- drivers/gpu/drm/radeon/radeon_asic.h | 4 +- drivers/gpu/drm/radeon/radeon_cs.c | 9 +-- drivers/gpu/drm/radeon/radeon_device.c | 1 - drivers/gpu/drm/radeon/radeon_gart.c | 126 +++++++++++++++++++++----------- drivers/gpu/drm/radeon/si.c | 30 ++++++-- 8 files changed, 153 insertions(+), 76 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ad337e8..88d5713 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1489,14 +1489,6 @@ void cayman_vm_fini(struct radeon_device *rdev) { }
-int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) -{ - WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0); - WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn); - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - return 0; -} - #define R600_PTE_VALID (1 << 0) #define R600_PTE_SYSTEM (1 << 1) #define R600_PTE_SNOOPED (1 << 2) @@ -1532,10 +1524,20 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_vm *vm = ib->vm;
- if (!ib->vm || ib->vm->id == -1) + if (vm == NULL) return;
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, vm->last_pfn); + + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); + radeon_ring_write(ring, vm->pt_gpu_addr >> 12); + /* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); radeon_ring_write(ring, 0x1); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1228778..e0c6673 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,6 +268,22 @@ static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, } }
+static inline bool radeon_fence_is_earlier(struct radeon_fence *a, + struct radeon_fence *b) +{ + if (!a) { + return false; + } + + if (!b) { + return true; + } + + BUG_ON(a->ring != b->ring); + + return a->seq < b->seq; +} + /* * Tiling registers */ @@ -647,10 +663,13 @@ struct radeon_ring { /* * VM */ + +#define RADEON_NUM_VM 16 + struct radeon_vm { struct list_head list; struct list_head va; - int id; + unsigned id; unsigned last_pfn; u64 pt_gpu_addr; u64 *pt; @@ -665,7 +684,7 @@ struct radeon_vm { struct radeon_vm_manager { struct mutex lock; struct list_head lru_vm; - uint32_t use_bitmap; + struct radeon_fence *active[RADEON_NUM_VM]; struct radeon_sa_manager sa_manager; uint32_t max_pfn; /* number of VMIDs */ @@ -1136,7 +1155,6 @@ struct radeon_asic { struct { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); - int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id); uint32_t (*page_flags)(struct radeon_device *rdev, struct radeon_vm *vm, uint32_t flags); @@ -1697,7 +1715,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) -#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id)) #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) @@ -1779,6 +1796,11 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring); +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence); int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index b44ab96..5486674 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1359,7 +1359,6 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .bind = &cayman_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1463,7 +1462,6 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .bind = &cayman_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1567,7 +1565,6 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, - .bind = &si_vm_bind, .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, @@ -1581,7 +1578,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1592,7 +1589,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1603,7 +1600,7 @@ static struct radeon_asic si_asic = { .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, .is_lockup = &si_gpu_is_lockup, - .vm_flush = &cayman_vm_flush, + .vm_flush = &si_vm_flush, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 87466d3..6f8002b 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -438,7 +438,6 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, @@ -468,8 +467,7 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id); -void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
#endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d4a804b..dc4554e 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -485,6 +485,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } radeon_cs_sync_rings(parser); radeon_cs_sync_to(parser, vm->last_flush); + radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -493,13 +494,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, r = radeon_ib_schedule(rdev, &parser->ib, NULL); }
-out: if (!r) { - if (vm->fence) { - radeon_fence_unref(&vm->fence); - } - vm->fence = radeon_fence_ref(parser->ib.fence); + radeon_vm_fence(rdev, vm, parser->ib.fence); } + +out: mutex_unlock(&vm->mutex); mutex_unlock(&rdev->vm_manager.lock); return r; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 742af82..9909d1f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1018,7 +1018,6 @@ int radeon_device_init(struct radeon_device *rdev, return r; /* initialize vm here */ mutex_init(&rdev->vm_manager.lock); - rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a6a3dca..d668733 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -437,7 +437,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev) int r;
if (!rdev->vm_manager.enabled) { - /* mark first vm as always in use, it's the system one */ /* allocate enough for 2 full VM pts */ r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, rdev->vm_manager.max_pfn * 8 * 2, @@ -461,7 +460,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */ list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { - if (vm->id == -1) + if (vm->sa_bo == NULL) continue;
list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -475,11 +474,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev) DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); } } - - r = radeon_asic_vm_bind(rdev, vm, vm->id); - if (r) { - DRM_ERROR("Failed to bind vm %d!\n", vm->id); - } } return 0; } @@ -500,10 +494,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, { struct radeon_bo_va *bo_va;
- if (vm->id == -1) { - return; - } - /* wait for vm use to end */ while (vm->fence) { int r; @@ -523,9 +513,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, radeon_fence_unref(&vm->last_flush);
/* hw unbind */ - rdev->vm_manager.use_bitmap &= ~(1 << vm->id); list_del_init(&vm->list); - vm->id = -1; radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); vm->pt = NULL;
@@ -544,6 +532,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev, void radeon_vm_manager_fini(struct radeon_device *rdev) { struct radeon_vm *vm, *tmp; + int i;
if (!rdev->vm_manager.enabled) return; @@ -553,6 +542,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } + for (i = 0; i < RADEON_NUM_VM; ++i) { + radeon_fence_unref(&rdev->vm_manager.active[i]); + } radeon_asic_vm_fini(rdev); mutex_unlock(&rdev->vm_manager.lock);
@@ -593,14 +585,13 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_vm *vm_evict; - unsigned i; - int id = -1, r; + int r;
if (vm == NULL) { return -EINVAL; }
- if (vm->id != -1) { + if (vm->sa_bo != NULL) { /* update lru */ list_del_init(&vm->list); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); @@ -623,33 +614,86 @@ retry: vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
-retry_id: - /* search for free vm */ - for (i = 0; i < rdev->vm_manager.nvm; i++) { - if (!(rdev->vm_manager.use_bitmap & (1 << i))) { - id = i; - break; + list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); + return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, + &rdev->ring_tmp_bo.bo->tbo.mem); +} + +/** + * radeon_vm_grab_id - allocate the next free VMID + * + * @rdev: radeon_device pointer + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * + * Allocate an id for the vm (cayman+). + * Returns the fence we need to sync to (if any). + * + * Global and local mutex must be locked! + */ +struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, + struct radeon_vm *vm, int ring) +{ + struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + unsigned choices[2] = {}; + unsigned i; + + /* check if the id is still valid */ + if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id]) + return NULL; + + /* we definately need to flush */ + radeon_fence_unref(&vm->last_flush); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < rdev->vm_manager.nvm; ++i) { + struct radeon_fence *fence = rdev->vm_manager.active[i]; + + if (fence == NULL) { + /* found a free one */ + vm->id = i; + return NULL; + } + + if (radeon_fence_is_earlier(fence, best[fence->ring])) { + best[fence->ring] = fence; + choices[fence->ring == ring ? 0 : 1] = i; } - } - /* evict vm if necessary */ - if (id == -1) { - vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - radeon_vm_unbind(rdev, vm_evict); - goto retry_id; }
- /* do hw bind */ - r = radeon_asic_vm_bind(rdev, vm, id); - radeon_fence_unref(&vm->last_flush); - if (r) { - radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); - return r; + for (i = 0; i < 2; ++i) { + if (choices[i]) { + vm->id = choices[i]; + return rdev->vm_manager.active[choices[i]]; + } } - rdev->vm_manager.use_bitmap |= 1 << id; - vm->id = id; - list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); - return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, - &rdev->ring_tmp_bo.bo->tbo.mem); + + /* should never happen */ + BUG(); + return NULL; +} + +/** + * radeon_vm_fence - remember fence for vm + * + * @rdev: radeon_device pointer + * @vm: vm we want to fence + * @fence: fence to remember + * + * Fence the vm (cayman+). + * Set the fence used to protect page table and id. + * + * Global and local mutex must be locked! + */ +void radeon_vm_fence(struct radeon_device *rdev, + struct radeon_vm *vm, + struct radeon_fence *fence) +{ + radeon_fence_unref(&rdev->vm_manager.active[vm->id]); + rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + + radeon_fence_unref(&vm->fence); + vm->fence = radeon_fence_ref(fence); }
/* object have to be reserved */ @@ -806,7 +850,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, uint32_t flags;
/* nothing to do if vm isn't bound */ - if (vm->id == -1) + if (vm->sa_bo == NULL) return 0;
bo_va = radeon_bo_va(bo, vm); @@ -928,7 +972,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) { int r;
- vm->id = -1; + vm->id = 0; vm->fence = NULL; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->list); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 7ef16d6..af115b8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2781,14 +2781,30 @@ void si_vm_fini(struct radeon_device *rdev) { }
-int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) { - if (id < 8) - WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12); - else - WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2), - vm->pt_gpu_addr >> 12); - return 0; + struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_vm *vm = ib->vm; + + if (vm == NULL) + return; + + if (vm->id < 8) { + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + + (vm->id << 2), 0)); + } else { + radeon_ring_write(ring, PACKET0(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + + ((vm->id - 8) << 2), 0)); + } + radeon_ring_write(ring, vm->pt_gpu_addr >> 12); + + /* flush hdp cache */ + radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); + radeon_ring_write(ring, 0x1); + + /* bits 0-7 are the VM contexts0-7 */ + radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); + radeon_ring_write(ring, 1 << ib->vm->id); }
/*
Removing the need to wait for anything.
Still not ideal, since we need to free pt on va remove.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 4 +- drivers/gpu/drm/radeon/radeon_cs.c | 28 +-------- drivers/gpu/drm/radeon/radeon_gart.c | 107 +++++++++++----------------------- drivers/gpu/drm/radeon/radeon_sa.c | 20 +++---- 4 files changed, 43 insertions(+), 116 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e0c6673..ed0ef17 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -316,7 +316,6 @@ struct radeon_bo_va { uint64_t soffset; uint64_t eoffset; uint32_t flags; - struct radeon_fence *fence; bool valid; };
@@ -1794,8 +1793,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev); void radeon_vm_manager_fini(struct radeon_device *rdev); int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm); void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm); -int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm); -void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm); struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_fence(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dc4554e..300fc25 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -286,30 +286,6 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; }
-static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, - struct radeon_fence *fence) -{ - struct radeon_fpriv *fpriv = parser->filp->driver_priv; - struct radeon_vm *vm = &fpriv->vm; - struct radeon_bo_list *lobj; - - if (parser->chunk_ib_idx == -1) { - return; - } - if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) { - return; - } - - list_for_each_entry(lobj, &parser->validated, tv.head) { - struct radeon_bo_va *bo_va; - struct radeon_bo *rbo = lobj->bo; - - bo_va = radeon_bo_va(rbo, vm); - radeon_fence_unref(&bo_va->fence); - bo_va->fence = radeon_fence_ref(fence); - } -} - /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -323,8 +299,6 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) unsigned i;
if (!error) { - /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ - radeon_bo_vm_fence_va(parser, parser->ib.fence); ttm_eu_fence_buffer_objects(&parser->validated, parser->ib.fence); } else { @@ -475,7 +449,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - r = radeon_vm_bind(rdev, vm); + r = radeon_vm_alloc_pt(rdev, vm); if (r) { goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index d668733..4bce026 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -478,43 +478,26 @@ int radeon_vm_manager_init(struct radeon_device *rdev) return 0; }
-/* global mutex must be lock */ /** - * radeon_vm_unbind_locked - unbind a specific vm + * radeon_vm_free_pt - free the page table for a specific vm * * @rdev: radeon_device pointer * @vm: vm to unbind * - * Unbind the requested vm (cayman+). - * Wait for use of the VM to finish, then unbind the page table, - * and free the page table memory. + * Free the page table of a specific vm (cayman+). + * + * Global and local mutex must be lock! */ -static void radeon_vm_unbind_locked(struct radeon_device *rdev, +static void radeon_vm_free_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va;
- /* wait for vm use to end */ - while (vm->fence) { - int r; - r = radeon_fence_wait(vm->fence, false); - if (r) - DRM_ERROR("error while waiting for fence: %d\n", r); - if (r == -EDEADLK) { - mutex_unlock(&rdev->vm_manager.lock); - r = radeon_gpu_reset(rdev); - mutex_lock(&rdev->vm_manager.lock); - if (!r) - continue; - } - break; - } - radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); + if (!vm->sa_bo) + return;
- /* hw unbind */ list_del_init(&vm->list); - radeon_sa_bo_free(rdev, &vm->sa_bo, NULL); + radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); vm->pt = NULL;
list_for_each_entry(bo_va, &vm->va, vm_list) { @@ -538,9 +521,11 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) return;
mutex_lock(&rdev->vm_manager.lock); - /* unbind all active vm */ + /* free all allocated page tables */ list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { - radeon_vm_unbind_locked(rdev, vm); + mutex_lock(&vm->mutex); + radeon_vm_free_pt(rdev, vm); + mutex_unlock(&vm->mutex); } for (i = 0; i < RADEON_NUM_VM; ++i) { radeon_fence_unref(&rdev->vm_manager.active[i]); @@ -553,36 +538,19 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) rdev->vm_manager.enabled = false; }
-/* global mutex must be locked */ -/** - * radeon_vm_unbind - locked version of unbind - * - * @rdev: radeon_device pointer - * @vm: vm to unbind - * - * Locked version that wraps radeon_vm_unbind_locked (cayman+). - */ -void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) -{ - mutex_lock(&vm->mutex); - radeon_vm_unbind_locked(rdev, vm); - mutex_unlock(&vm->mutex); -} - -/* global and local mutex must be locked */ /** - * radeon_vm_bind - bind a page table to a VMID + * radeon_vm_alloc_pt - allocates a page table for a VM * * @rdev: radeon_device pointer * @vm: vm to bind * - * Bind the requested vm (cayman+). - * Suballocate memory for the page table, allocate a VMID - * and bind the page table to it, and finally start to populate - * the page table. + * Allocate a page table for the requested vm (cayman+). + * Also starts to populate the page table. * Returns 0 for success, error for failure. + * + * Global and local mutex must be locked! */ -int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) +int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_vm *vm_evict; int r; @@ -602,14 +570,20 @@ retry: r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), RADEON_GPU_PAGE_SIZE, false); - if (r) { + if (r == -ENOMEM) { if (list_empty(&rdev->vm_manager.lru_vm)) { return r; } vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); - radeon_vm_unbind(rdev, vm_evict); + mutex_lock(&vm_evict->mutex); + radeon_vm_free_pt(rdev, vm_evict); + mutex_unlock(&vm_evict->mutex); goto retry; + + } else if (r) { + return r; } + vm->pt = radeon_sa_bo_cpu_addr(vm->sa_bo); vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8)); @@ -758,7 +732,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev, if (last_pfn > vm->last_pfn) { /* grow va space 32M by 32M */ unsigned align = ((32 << 20) >> 12) - 1; - radeon_vm_unbind_locked(rdev, vm); + radeon_vm_free_pt(rdev, vm); vm->last_pfn = (last_pfn + align) & ~align; } mutex_unlock(&rdev->vm_manager.lock); @@ -886,7 +860,6 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return 0; }
-/* object have to be reserved */ /** * radeon_vm_bo_rmv - remove a bo to a specific vm * @@ -898,36 +871,22 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, * Remove @bo from the list of bos associated with the vm and * remove the ptes for @bo in the page table. * Returns 0 for success. + * + * Object have to be reserved! */ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; - int r;
bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) return 0;
- /* wait for va use to end */ - while (bo_va->fence) { - r = radeon_fence_wait(bo_va->fence, false); - if (r) { - DRM_ERROR("error while waiting for fence: %d\n", r); - } - if (r == -EDEADLK) { - r = radeon_gpu_reset(rdev); - if (!r) - continue; - } - break; - } - radeon_fence_unref(&bo_va->fence); - mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - radeon_vm_bo_update_pte(rdev, vm, bo, NULL); + radeon_vm_free_pt(rdev, vm); mutex_unlock(&rdev->vm_manager.lock); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); @@ -1010,7 +969,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - radeon_vm_unbind_locked(rdev, vm); + radeon_vm_free_pt(rdev, vm); mutex_unlock(&rdev->vm_manager.lock);
/* remove all bo at this point non are busy any more because unbind @@ -1021,7 +980,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); list_del_init(&bo_va->bo_list); list_del_init(&bo_va->vm_list); - radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); kfree(bo_va); } @@ -1033,10 +991,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_reserve(bo_va->bo, false); if (!r) { list_del_init(&bo_va->bo_list); - radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(bo_va->bo); kfree(bo_va); } } + radeon_fence_unref(&vm->fence); + radeon_fence_unref(&vm->last_flush); mutex_unlock(&vm->mutex); } diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 4e77124..105fde6 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -316,7 +316,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev, { struct radeon_fence *fences[RADEON_NUM_RINGS]; unsigned tries[RADEON_NUM_RINGS]; - int i, r = -ENOMEM; + int i, r;
BUG_ON(align > RADEON_GPU_PAGE_SIZE); BUG_ON(size > sa_manager->size); @@ -331,7 +331,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev, INIT_LIST_HEAD(&(*sa_bo)->flist);
spin_lock(&sa_manager->wq.lock); - while(1) { + do { for (i = 0; i < RADEON_NUM_RINGS; ++i) { fences[i] = NULL; tries[i] = 0; @@ -349,26 +349,22 @@ int radeon_sa_bo_new(struct radeon_device *rdev, /* see if we can skip over some allocations */ } while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
- if (!block) { - break; - } - spin_unlock(&sa_manager->wq.lock); r = radeon_fence_wait_any(rdev, fences, false); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ - if (r == -ENOENT) { + if (r == -ENOENT && block) { r = wait_event_interruptible_locked( sa_manager->wq, radeon_sa_event(sa_manager, size, align) ); + + } else if (r == -ENOENT) { + r = -ENOMEM; } - if (r) { - goto out_err; - } - };
-out_err: + } while (!r); + spin_unlock(&sa_manager->wq.lock); kfree(*sa_bo); *sa_bo = NULL;
Makes it easier to move it into the rings.
Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/ni.c | 22 +++++++++++++++------- drivers/gpu/drm/radeon/radeon.h | 12 ++++++------ drivers/gpu/drm/radeon/radeon_asic.c | 3 --- drivers/gpu/drm/radeon/radeon_asic.h | 7 +++---- drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++-------------- 5 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 88d5713..1fd2e41 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1495,9 +1495,7 @@ void cayman_vm_fini(struct radeon_device *rdev) #define R600_PTE_READABLE (1 << 5) #define R600_PTE_WRITEABLE (1 << 6)
-uint32_t cayman_vm_page_flags(struct radeon_device *rdev, - struct radeon_vm *vm, - uint32_t flags) +uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags) { uint32_t r600_flags = 0;
@@ -1512,13 +1510,23 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, }
void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, - unsigned pfn, uint64_t addr, uint32_t flags) + unsigned pfn, struct ttm_mem_reg *mem, + unsigned npages, uint32_t flags) { void __iomem *ptr = (void *)vm->pt; + uint64_t addr; + int i; + + addr = flags = cayman_vm_page_flags(rdev, flags);
- addr = addr & 0xFFFFFFFFFFFFF000ULL; - addr |= flags; - writeq(addr, ptr + (pfn * 8)); + for (i = 0; i < npages; ++i, ++pfn) { + if (mem) { + addr = radeon_vm_get_addr(rdev, mem, i); + addr = addr & 0xFFFFFFFFFFFFF000ULL; + addr |= flags; + } + writeq(addr, ptr + (pfn * 8)); + } }
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ed0ef17..7d37cb2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1154,11 +1154,9 @@ struct radeon_asic { struct { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); - uint32_t (*page_flags)(struct radeon_device *rdev, - struct radeon_vm *vm, - uint32_t flags); void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, - unsigned pfn, uint64_t addr, uint32_t flags); + unsigned pfn, struct ttm_mem_reg *mem, + unsigned npages, uint32_t flags); } vm; /* ring specific callbacks */ struct { @@ -1714,8 +1712,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) -#define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags)) -#define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags)) +#define radeon_asic_vm_set_page(rdev, v, pfn, mem, npages, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (mem), (npages), (flags)) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) @@ -1799,6 +1796,9 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); +u64 radeon_vm_get_addr(struct radeon_device *rdev, + struct ttm_mem_reg *mem, + unsigned pfn); int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 5486674..4b99a24 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1359,7 +1359,6 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1462,7 +1461,6 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, - .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1565,7 +1563,6 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, - .page_flags = &cayman_vm_page_flags, .set_page = &cayman_vm_set_page, }, .ring = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 6f8002b..40462e0 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -440,11 +440,10 @@ int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm); void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib); -uint32_t cayman_vm_page_flags(struct radeon_device *rdev, - struct radeon_vm *vm, - uint32_t flags); +uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, - unsigned pfn, uint64_t addr, uint32_t flags); + unsigned pfn, struct ttm_mem_reg *mem, + unsigned npages, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
/* DCE6 - SI */ diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4bce026..bf5378b 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -450,7 +450,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) r = radeon_asic_vm_init(rdev); if (r) return r; - + rdev->vm_manager.enabled = true;
r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager); @@ -773,9 +773,9 @@ int radeon_vm_bo_add(struct radeon_device *rdev, * to (cayman+). * Returns the physical address of the page. */ -static u64 radeon_vm_get_addr(struct radeon_device *rdev, - struct ttm_mem_reg *mem, - unsigned pfn) +u64 radeon_vm_get_addr(struct radeon_device *rdev, + struct ttm_mem_reg *mem, + unsigned pfn) { u64 addr = 0;
@@ -819,9 +819,8 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct ttm_mem_reg *mem) { struct radeon_bo_va *bo_va; - unsigned ngpu_pages, i; - uint64_t addr = 0, pfn; - uint32_t flags; + unsigned ngpu_pages; + uint64_t pfn;
/* nothing to do if vm isn't bound */ if (vm->sa_bo == NULL) @@ -848,14 +847,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, bo_va->flags |= RADEON_VM_PAGE_SYSTEM; } } - pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; - flags = radeon_asic_vm_page_flags(rdev, bo_va->vm, bo_va->flags); - for (i = 0, addr = 0; i < ngpu_pages; i++) { - if (mem && bo_va->valid) { - addr = radeon_vm_get_addr(rdev, mem, i); - } - radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags); + if (!bo_va->valid) { + mem = NULL; } + pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; + radeon_asic_vm_set_page(rdev, bo_va->vm, pfn, mem, ngpu_pages, bo_va->flags); radeon_fence_unref(&vm->last_flush); return 0; }
From: Jerome Glisse jglisse@redhat.com
Make sure that the ib bo is bound and is page table is up to date in the virtual address space.
Signed-off-by: Jerome Glisse jglisse@redhat.com Reviewed-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_cs.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 300fc25..3ae7c27 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -372,10 +372,15 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, struct radeon_vm *vm) { + struct radeon_device *rdev = parser->rdev; struct radeon_bo_list *lobj; struct radeon_bo *bo; int r;
+ r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); + if (r) { + return r; + } list_for_each_entry(lobj, &parser->validated, tv.head) { bo = lobj->bo; r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem);
Currently doing the update with the CP.
v2: Rebased on Jeromes bugfix. Make validity comparison more human readable.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/ni.c | 20 ++++++---- drivers/gpu/drm/radeon/nid.h | 1 + drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_asic.c | 3 ++ drivers/gpu/drm/radeon/radeon_gart.c | 70 +++++++++++++++++++++++++--------- 5 files changed, 71 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 1fd2e41..78d9cfb 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1513,20 +1513,24 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, unsigned pfn, struct ttm_mem_reg *mem, unsigned npages, uint32_t flags) { - void __iomem *ptr = (void *)vm->pt; - uint64_t addr; + struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; + uint64_t addr, pt = vm->pt_gpu_addr + pfn * 8; int i;
addr = flags = cayman_vm_page_flags(rdev, flags);
- for (i = 0; i < npages; ++i, ++pfn) { - if (mem) { - addr = radeon_vm_get_addr(rdev, mem, i); + radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + npages * 2)); + radeon_ring_write(ring, pt & 0xffffffff); + radeon_ring_write(ring, (pt >> 32) & 0xff); + for (i = 0; i < npages; ++i) { + if (mem) { + addr = radeon_vm_get_addr(rdev, mem, i); addr = addr & 0xFFFFFFFFFFFFF000ULL; addr |= flags; - } - writeq(addr, ptr + (pfn * 8)); - } + } + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (addr >> 32) & 0xffffffff); + } }
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 870db34..2423d1b 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -585,6 +585,7 @@ #define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 #define PACKET3_SET_RESOURCE_INDIRECT 0x74 #define PACKET3_SET_APPEND_CNT 0x75 +#define PACKET3_ME_WRITE 0x7A
#endif
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7d37cb2..3de0f08 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1154,6 +1154,8 @@ struct radeon_asic { struct { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); + + u32 pt_ring_index; void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, unsigned pfn, struct ttm_mem_reg *mem, unsigned npages, uint32_t flags); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 4b99a24..d0b4e50 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1359,6 +1359,7 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1461,6 +1462,7 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1563,6 +1565,7 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index bf5378b..3ef6698 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -464,15 +464,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) continue;
list_for_each_entry(bo_va, &vm->va, vm_list) { - struct ttm_mem_reg *mem = NULL; - if (bo_va->valid) - mem = &bo_va->bo->tbo.mem; - bo_va->valid = false; - r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem); - if (r) { - DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); - } } } return 0; @@ -801,7 +793,6 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, return addr; }
-/* object have to be reserved & global and local mutex must be locked */ /** * radeon_vm_bo_update_pte - map a bo into the vm page table * @@ -812,15 +803,21 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, * * Fill in the page table entries for @bo (cayman+). * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved & global and local mutex must be locked! */ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, struct ttm_mem_reg *mem) { + unsigned ridx = rdev->asic->vm.pt_ring_index; + struct radeon_ring *ring = &rdev->ring[ridx]; + struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; - unsigned ngpu_pages; + unsigned ngpu_pages, ndw; uint64_t pfn; + int r;
/* nothing to do if vm isn't bound */ if (vm->sa_bo == NULL) @@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -EINVAL; }
- if (bo_va->valid && mem) + if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) return 0;
ngpu_pages = radeon_bo_ngpu_pages(bo); @@ -846,12 +843,50 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; } - } - if (!bo_va->valid) { - mem = NULL; + if (!bo_va->valid) { + mem = NULL; + } + } else { + bo_va->valid = false; } pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; - radeon_asic_vm_set_page(rdev, bo_va->vm, pfn, mem, ngpu_pages, bo_va->flags); + + if (vm->fence && radeon_fence_signaled(vm->fence)) { + radeon_fence_unref(&vm->fence); + } + + if (vm->fence && vm->fence->ring != ridx) { + r = radeon_semaphore_create(rdev, &sem); + if (r) { + return r; + } + } + + /* estimate number of dw needed */ + ndw = 32; + ndw += (ngpu_pages >> 12) * 3; + ndw += ngpu_pages * 2; + + r = radeon_ring_lock(rdev, ring, ndw); + if (r) { + return r; + } + + if (sem && radeon_fence_need_sync(vm->fence, ridx)) { + radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx); + radeon_fence_note_sync(vm->fence, ridx); + } + + radeon_asic_vm_set_page(rdev, vm, pfn, mem, ngpu_pages, bo_va->flags); + + radeon_fence_unref(&vm->fence); + r = radeon_fence_emit(rdev, &vm->fence, ridx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush); return 0; } @@ -875,6 +910,7 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; + int r;
bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) @@ -882,14 +918,14 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); + r = radeon_vm_bo_update_pte(rdev, vm, bo, NULL); mutex_unlock(&rdev->vm_manager.lock); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list);
kfree(bo_va); - return 0; + return r; }
/**
On Mon, Aug 20, 2012 at 4:08 AM, Christian König deathsimple@vodafone.de wrote:
Second and hopefully last round for this patchset.
v2: Fix suspend/resume, and incorporate Jeromes comments.
Looks good to me. Can you put up a git branch somewhere?
Reviewed-by: Alex Deucher alexander.deucher@amd.com
Cheers, Christian.
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On 20.08.2012 20:58, Alex Deucher wrote:
On Mon, Aug 20, 2012 at 4:08 AM, Christian König deathsimple@vodafone.de wrote:
Second and hopefully last round for this patchset.
v2: Fix suspend/resume, and incorporate Jeromes comments.
Looks good to me. Can you put up a git branch somewhere?
Sure: git://people.freedesktop.org/~deathsimple/linux git://people.freedesktop.org/%7Edeathsimple/linux wip
Christian.
Reviewed-by: Alex Deucher alexander.deucher@amd.com
Cheers, Christian.
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org