From: Christian König christian.koenig@amd.com
Use ring structure instead of index and provide vm_id and pd_addr separately.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 23 ++++++++++------------- drivers/gpu/drm/radeon/cik_sdma.c | 22 +++++++++------------- drivers/gpu/drm/radeon/ni.c | 14 +++++--------- drivers/gpu/drm/radeon/ni_dma.c | 14 +++++--------- drivers/gpu/drm/radeon/radeon.h | 5 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++++++++++++------ drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/radeon/si.c | 18 +++++++----------- drivers/gpu/drm/radeon/si_dma.c | 19 ++++++++----------- 9 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 57a359d..d52ead9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5967,26 +5967,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, * Update the page table base and flush the VM TLB * using the CP (CIK). */ -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); - - if (vm == NULL) - return; + int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); - if (vm->id < 8) { + if (vm_id < 8) { radeon_ring_write(ring, - (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring, - (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);
/* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -5994,7 +5991,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, VMID(vm->id)); + radeon_ring_write(ring, VMID(vm_id));
radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -6015,7 +6012,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VMID(0));
/* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, ridx); + cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
/* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -6023,7 +6020,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);
/* compute doesn't have PFP */ if (usepfp) { diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 4e8432d..7470a2e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -901,25 +901,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib) * Update the page table base and flush the VM TLB * using sDMA (CIK). */ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm->id < 8) { - radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + if (vm_id < 8) { + radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { - radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);
/* update SH_MEM_* regs */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(vm->id)); + radeon_ring_write(ring, VMID(vm_id));
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SH_MEM_BASES >> 2); @@ -942,11 +938,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm radeon_ring_write(ring, VMID(0));
/* flush HDP */ - cik_sdma_hdp_flush_ring_emit(rdev, ridx); + cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);
/* flush TLB */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3faee58..bee432d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2502,15 +2502,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev, * Update the page table base and flush the VM TLB * using the CP (cayman-si). */ -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0)); + radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); @@ -2518,7 +2514,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
/* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index f26f0a9..5a72404 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -446,16 +446,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib) ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); }
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); + radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); @@ -465,6 +461,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 908f349..5d913ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1790,7 +1790,8 @@ struct radeon_asic_ring { void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); - void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);
/* testing functions */ int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); @@ -2836,7 +2837,7 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp)) -#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm)) +#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr)) #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r)) #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r)) #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index d8ace5b..2a45d54 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -599,7 +599,8 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); @@ -624,7 +625,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cayman_dma_vm_pad_ib(struct radeon_ib *ib);
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);
u32 cayman_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); @@ -699,7 +701,8 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); struct radeon_fence *si_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -721,7 +724,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags);
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); @@ -793,7 +797,8 @@ int cik_irq_set(struct radeon_device *rdev); int cik_irq_process(struct radeon_device *rdev); int cik_vm_init(struct radeon_device *rdev); void cik_vm_fini(struct radeon_device *rdev); -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);
void cik_sdma_vm_copy_pages(struct radeon_device *rdev, struct radeon_ib *ib, @@ -811,7 +816,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cik_sdma_vm_pad_ib(struct radeon_ib *ib);
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); u32 cik_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index dfde266..9d0f87b 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -243,7 +243,8 @@ void radeon_vm_flush(struct radeon_device *rdev, if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { trace_radeon_vm_flush(pd_addr, ring, vm->id); vm->pd_gpu_addr = pd_addr; - radeon_ring_vm_flush(rdev, ring, vm); + radeon_ring_vm_flush(rdev, &rdev->ring[ring], + vm->id, vm->pd_gpu_addr); } }
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index eeea5b6..e91968b 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5020,27 +5020,23 @@ static void si_vm_decode_fault(struct radeon_device *rdev, block, mc_id); }
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - /* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0)));
- if (vm->id < 8) { + if (vm_id < 8) { radeon_ring_write(ring, - (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring, - (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -5056,7 +5052,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index b58f12b..e8bc0a5 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -185,20 +185,17 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, } }
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr)
+{ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm->id < 8) { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + if (vm_id < 8) { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); } else { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2)); } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);
/* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); @@ -208,7 +205,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }
/**
From: Christian König christian.koenig@amd.com
That's useless when all callers drop the reservation immediately after calling the function.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_kms.c | 2 -- drivers/gpu/drm/radeon/radeon_vm.c | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c194497..f752c7f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -601,6 +601,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, if (bo_va->it.start) { args->operation = RADEON_VA_RESULT_VA_EXIST; args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; + radeon_bo_unreserve(rbo); goto out; } r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); @@ -616,7 +617,6 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, args->operation = RADEON_VA_RESULT_ERROR; } out: - radeon_bo_unreserve(rbo); drm_gem_object_unreference_unlocked(gobj); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8309b11..85ee6f7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -621,8 +621,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); - - radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { radeon_vm_fini(rdev, vm); kfree(fpriv); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 9d0f87b..db0ed3a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -450,7 +450,7 @@ error: * Validate and set the offset requested within the vm address space. * Returns 0 for success, error for failure. * - * Object has to be reserved! + * Object has to be reserved and gets unreserved by this function! */ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_bo_va *bo_va, @@ -576,7 +576,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, }
mutex_unlock(&vm->mutex); - return radeon_bo_reserve(bo_va->bo, false); + return 0; }
/**
From: Christian König christian.koenig@amd.com
The PD/PTs reservation object now contains everything needed.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index db0ed3a..d8c49ad 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -701,7 +701,6 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib);
radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false); - radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { @@ -969,7 +968,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > ndw);
- radeon_semaphore_sync_fence(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib);
From: Christian König christian.koenig@amd.com
Previously we just allocated space for four hardware semaphores in each software semaphore object. Make software semaphore objects represent only one hardware semaphore address again by splitting the sync code into it's own object.
v2: fix typo in comment
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/Makefile | 3 +- drivers/gpu/drm/radeon/cik.c | 18 +-- drivers/gpu/drm/radeon/cik_sdma.c | 18 +-- drivers/gpu/drm/radeon/evergreen_dma.c | 18 +-- drivers/gpu/drm/radeon/r600.c | 18 +-- drivers/gpu/drm/radeon/r600_dma.c | 18 +-- drivers/gpu/drm/radeon/radeon.h | 42 +++--- drivers/gpu/drm/radeon/radeon_cs.c | 8 +- drivers/gpu/drm/radeon/radeon_ib.c | 13 +- drivers/gpu/drm/radeon/radeon_semaphore.c | 154 +-------------------- drivers/gpu/drm/radeon/radeon_sync.c | 213 ++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_vm.c | 4 +- drivers/gpu/drm/radeon/rv770_dma.c | 18 +-- drivers/gpu/drm/radeon/si_dma.c | 18 +-- 14 files changed, 303 insertions(+), 260 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_sync.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index d01b879..a02434a 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o + ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \ + radeon_sync.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index d52ead9..6bb8b84 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3979,31 +3979,27 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.blit_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes, control; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -4027,12 +4023,12 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 7470a2e..604e2e7 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -541,31 +541,27 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -586,12 +582,12 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 66bcfad..96535aa 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -110,31 +110,27 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -153,12 +149,12 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 56b0292..ef5d606 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2889,31 +2889,27 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.blit_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes, tmp; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); @@ -2942,12 +2938,12 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index aabc343..3a58b80 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -441,31 +441,27 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -484,12 +480,12 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5d913ef..5e88e75 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -150,9 +150,6 @@ extern int radeon_backlight; /* number of hw syncs before falling back on blocking */ #define RADEON_NUM_SYNCS 4
-/* number of hw syncs before falling back on blocking */ -#define RADEON_NUM_SYNCS 4 - /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -576,10 +573,9 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, * Semaphores. */ struct radeon_semaphore { - struct radeon_sa_bo *sa_bo; - signed waiters; - uint64_t gpu_addr; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + struct radeon_sa_bo *sa_bo; + signed waiters; + uint64_t gpu_addr; };
int radeon_semaphore_create(struct radeon_device *rdev, @@ -588,20 +584,32 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore, - struct radeon_fence *fence); -int radeon_semaphore_sync_resv(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, - struct reservation_object *resv, - bool shared); -int radeon_semaphore_sync_rings(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, - int waiting_ring); void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence);
/* + * Synchronization + */ +struct radeon_sync { + struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS]; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; +}; + +void radeon_sync_create(struct radeon_sync *sync); +void radeon_sync_fence(struct radeon_sync *sync, + struct radeon_fence *fence); +int radeon_sync_resv(struct radeon_device *rdev, + struct radeon_sync *sync, + struct reservation_object *resv, + bool shared); +int radeon_sync_rings(struct radeon_device *rdev, + struct radeon_sync *sync, + int waiting_ring); +void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync, + struct radeon_fence *fence); + +/* * GART structures, functions & helpers */ struct radeon_mc; @@ -814,7 +822,7 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib; - struct radeon_semaphore *semaphore; + struct radeon_sync sync; };
struct radeon_ring { diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index a3e7aed..30437aa 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -260,8 +260,8 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p) continue;
resv = p->relocs[i].robj->tbo.resv; - r = radeon_semaphore_sync_resv(p->rdev, p->ib.semaphore, resv, - p->relocs[i].tv.shared); + r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, + p->relocs[i].tv.shared);
if (r) break; @@ -285,9 +285,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL; - p->ib.semaphore = NULL; p->const_ib.sa_bo = NULL; - p->const_ib.semaphore = NULL; p->chunk_ib_idx = -1; p->chunk_relocs_idx = -1; p->chunk_flags_idx = -1; @@ -582,7 +580,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, DRM_ERROR("Failed to sync rings: %i\n", r); goto out; } - radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence); + radeon_sync_fence(&parser->ib.sync, vm->fence);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 3f39fcc..56a1704 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }
- r = radeon_semaphore_create(rdev, &ib->semaphore); - if (r) { - return r; - } + radeon_sync_create(&ib->sync);
ib->ring = ring; ib->fence = NULL; @@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, */ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); + radeon_sync_free(rdev, &ib->sync, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } @@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (ib->vm) { struct radeon_fence *vm_id_fence; vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); - radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence); + radeon_sync_fence(&ib->sync, vm_id_fence); }
/* sync with other rings */ - r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + r = radeon_sync_rings(rdev, &ib->sync, ib->ring); if (r) { dev_err(rdev->dev, "failed to sync rings (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); @@ -161,7 +158,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); - radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); + radeon_sync_free(rdev, &const_ib->sync, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 6deb08f..e6ad54c 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,15 +34,14 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { - uint64_t *cpu_addr; - int i, r; + int r;
*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { return -ENOMEM; } - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, - 8 * RADEON_NUM_SYNCS, 8); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, + &(*semaphore)->sa_bo, 8, 8); if (r) { kfree(*semaphore); *semaphore = NULL; @@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
- cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); - for (i = 0; i < RADEON_NUM_SYNCS; ++i) - cpu_addr[i] = 0; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) - (*semaphore)->sync_to[i] = NULL; + *((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
return 0; } @@ -95,146 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, return false; }
-/** - * radeon_semaphore_sync_fence - use the semaphore to sync to a fence - * - * @semaphore: semaphore object to add fence to - * @fence: fence to sync to - * - * Sync to the fence using this semaphore object - */ -void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore, - struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = semaphore->sync_to[fence->ring]; - semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); -} - -/** - * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object - * - * @sema: semaphore object to add fence from reservation object to - * @resv: reservation object with embedded fence - * @shared: true if we should onyl sync to the exclusive fence - * - * Sync to the fence using this semaphore object - */ -int radeon_semaphore_sync_resv(struct radeon_device *rdev, - struct radeon_semaphore *sema, - struct reservation_object *resv, - bool shared) -{ - struct reservation_object_list *flist; - struct fence *f; - struct radeon_fence *fence; - unsigned i; - int r = 0; - - /* always sync to the exclusive fence */ - f = reservation_object_get_excl(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_semaphore_sync_fence(sema, fence); - else if (f) - r = fence_wait(f, true); - - flist = reservation_object_get_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(resv)); - fence = to_radeon_fence(f); - if (fence && fence->rdev == rdev) - radeon_semaphore_sync_fence(sema, fence); - else - r = fence_wait(f, true); - - if (r) - break; - } - return r; -} - -/** - * radeon_semaphore_sync_rings - sync ring to all registered fences - * - * @rdev: radeon_device pointer - * @semaphore: semaphore object to use for sync - * @ring: ring that needs sync - * - * Ensure that all registered fences are signaled before letting - * the ring continue. The caller must hold the ring lock. - */ -int radeon_semaphore_sync_rings(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, - int ring) -{ - unsigned count = 0; - int i, r; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = semaphore->sync_to[i]; - - /* check if we really need to sync */ - if (!radeon_fence_need_sync(fence, ring)) - continue; - - /* prevent GPU deadlocks */ - if (!rdev->ring[i].ready) { - dev_err(rdev->dev, "Syncing to a disabled ring!"); - return -EINVAL; - } - - if (++count > RADEON_NUM_SYNCS) { - /* not enough room, wait manually */ - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - /* allocate enough space for sync command */ - r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); - if (r) { - return r; - } - - /* emit the signal semaphore */ - if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { - /* signaling wasn't successful wait manually */ - radeon_ring_undo(&rdev->ring[i]); - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - /* we assume caller has already allocated space on waiters ring */ - if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { - /* waiting wasn't successful wait manually */ - radeon_ring_undo(&rdev->ring[i]); - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - radeon_ring_commit(rdev, &rdev->ring[i], false); - radeon_fence_note_sync(fence, ring); - - semaphore->gpu_addr += 8; - } - - return 0; -} - void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence) diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c new file mode 100644 index 0000000..ddd88fb --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -0,0 +1,213 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König christian.koenig@amd.com + */ + +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_trace.h" + +/** + * radeon_sync_create - zero init sync object + * + * @sync: sync object to initialize + * + * Just clear the sync object for now. + */ +void radeon_sync_create(struct radeon_sync *sync) +{ + unsigned i; + + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + sync->semaphores[i] = NULL; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + sync->sync_to[i] = NULL; +} + +/** + * radeon_sync_fence - use the semaphore to sync to a fence + * + * @sync: sync object to add fence to + * @fence: fence to sync to + * + * Sync to the fence using the semaphore objects + */ +void radeon_sync_fence(struct radeon_sync *sync, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = sync->sync_to[fence->ring]; + sync->sync_to[fence->ring] = radeon_fence_later(fence, other); +} + +/** + * radeon_sync_resv - use the semaphores to sync to a reservation object + * + * @sync: sync object to add fences from reservation object to + * @resv: reservation object with embedded fence + * @shared: true if we should only sync to the exclusive fence + * + * Sync to the fence using the semaphore objects + */ +int radeon_sync_resv(struct radeon_device *rdev, + struct radeon_sync *sync, + struct reservation_object *resv, + bool shared) +{ + struct reservation_object_list *flist; + struct fence *f; + struct radeon_fence *fence; + unsigned i; + int r = 0; + + /* always sync to the exclusive fence */ + f = reservation_object_get_excl(resv); + fence = f ? to_radeon_fence(f) : NULL; + if (fence && fence->rdev == rdev) + radeon_sync_fence(sync, fence); + else if (f) + r = fence_wait(f, true); + + flist = reservation_object_get_list(resv); + if (shared || !flist || r) + return r; + + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(resv)); + fence = to_radeon_fence(f); + if (fence && fence->rdev == rdev) + radeon_sync_fence(sync, fence); + else + r = fence_wait(f, true); + + if (r) + break; + } + return r; +} + +/** + * radeon_sync_rings - sync ring to all registered fences + * + * @rdev: radeon_device pointer + * @sync: sync object to use + * @ring: ring that needs sync + * + * Ensure that all registered fences are signaled before letting + * the ring continue. The caller must hold the ring lock. + */ +int radeon_sync_rings(struct radeon_device *rdev, + struct radeon_sync *sync, + int ring) +{ + unsigned count = 0; + int i, r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = sync->sync_to[i]; + struct radeon_semaphore *semaphore; + + /* check if we really need to sync */ + if (!radeon_fence_need_sync(fence, ring)) + continue; + + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Syncing to a disabled ring!"); + return -EINVAL; + } + + if (count >= RADEON_NUM_SYNCS) { + /* not enough room, wait manually */ + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + r = radeon_semaphore_create(rdev, &semaphore); + if (r) + return r; + + sync->semaphores[count++] = semaphore; + + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) + return r; + + /* emit the signal semaphore */ + if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { + /* signaling wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + + /* we assume caller has already allocated space on waiters ring */ + if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + /* waiting wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + + radeon_ring_commit(rdev, &rdev->ring[i], false); + radeon_fence_note_sync(fence, ring); + } + + return 0; +} + +/** + * radeon_sync_free - free the sync object + * + * @rdev: radeon_device pointer + * @sync: sync object to use + * @fence: fence to use for the free + * + * Free the sync object by freeing all semaphores in it. + */ +void radeon_sync_free(struct radeon_device *rdev, + struct radeon_sync *sync, + struct radeon_fence *fence) +{ + unsigned i; + + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + radeon_semaphore_free(rdev, &sync->semaphores[i], fence); +} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index d8c49ad..20ef826 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -700,7 +700,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);
- radeon_semaphore_sync_resv(rdev, ib.semaphore, pd->tbo.resv, false); + radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, false); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { @@ -826,7 +826,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned nptes; uint64_t pte;
- radeon_semaphore_sync_resv(rdev, ib->semaphore, pt->tbo.resv, false); + radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, false);
if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index 7f34bad..acff6e0 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -44,31 +44,27 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -87,12 +83,12 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; } diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index e8bc0a5..f5cc777 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -226,31 +226,27 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0;
- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);
size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
- radeon_semaphore_sync_resv(rdev, sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(rdev, &sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);
for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -269,12 +265,12 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }
radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);
return fence; }
From: Christian König christian.koenig@amd.com
This allows us to add the real execution fence as shared.
v2: fix typo
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_object.c | 19 ++++++++++ drivers/gpu/drm/radeon/radeon_object.h | 2 ++ drivers/gpu/drm/radeon/radeon_vm.c | 65 +++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 33e6c7a..686e450 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -818,3 +818,22 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) ttm_bo_unreserve(&bo->tbo); return r; } + +/** + * radeon_bo_fence - add fence to buffer object + * + * @bo: buffer object in question + * @fence: fence to add + * @shared: true if fence should be added shared + * + */ +void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, + bool shared) +{ + struct reservation_object *resv = bo->tbo.resv; + + if (shared) + reservation_object_add_shared_fence(resv, &fence->base); + else + reservation_object_add_excl_fence(resv, &fence->base); +} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 1b8ec79..3b0b377 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -155,6 +155,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); +extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, + bool shared);
/* * sub allocation diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 20ef826..4ca2779 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -143,7 +143,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[0].tv.bo = &vm->page_directory->tbo; - list[0].tv.shared = false; + list[0].tv.shared = true; list[0].tiling_flags = 0; list[0].handle = 0; list_add(&list[0].tv.head, head); @@ -157,7 +157,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].tv.bo = &list[idx].robj->tbo; - list[idx].tv.shared = false; + list[idx].tv.shared = true; list[idx].tiling_flags = 0; list[idx].handle = 0; list_add(&list[idx++].tv.head, head); @@ -388,35 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev, static int radeon_vm_clear_bo(struct radeon_device *rdev, struct radeon_bo *bo) { - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; struct radeon_ib ib; unsigned entries; uint64_t addr; int r;
- memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - tv.shared = false; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head, true); - if (r) + r = radeon_bo_reserve(bo, false); + if (r) return r;
- r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) - goto error; + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error_unreserve;
addr = radeon_bo_gpu_offset(bo); entries = radeon_bo_size(bo) / 8;
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); if (r) - goto error; + goto error_unreserve;
ib.length_dw = 0;
@@ -426,15 +416,15 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) - goto error; + goto error_free;
- ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base); - radeon_ib_free(rdev, &ib); + radeon_bo_fence(bo, ib.fence, false);
- return 0; +error_free: + radeon_ib_free(rdev, &ib);
-error: - ttm_eu_backoff_reservation(&ticket, &head); +error_unreserve: + radeon_bo_unreserve(bo); return r; }
@@ -707,6 +697,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + radeon_bo_fence(pd, ib.fence, false); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_fence_unref(&vm->last_flush); @@ -863,6 +854,31 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, }
/** + * radeon_vm_fence_pts - fence page tables after an update + * + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @fence: fence to use + * + * Fence the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_fence_pts(struct radeon_vm *vm, + uint64_t start, uint64_t end, + struct radeon_fence *fence) +{ + unsigned i; + + start >>= radeon_vm_block_size; + end >>= radeon_vm_block_size; + + for (i = start; i <= end; ++i) + radeon_bo_fence(vm->page_tables[i].bo, fence, false); +} + +/** * radeon_vm_bo_update - map a bo into the vm page table * * @rdev: radeon_device pointer @@ -973,6 +989,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib);
From: Christian König christian.koenig@amd.com
Note for each fence if it's a VM page table update or not. This allows us to determine the last VM update in a sync object and so to figure out if we need to flush the TLB or not.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 18 ++++++++++-------- drivers/gpu/drm/radeon/radeon_fence.c | 1 + drivers/gpu/drm/radeon/radeon_ib.c | 3 ++- drivers/gpu/drm/radeon/radeon_sync.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_vm.c | 25 +++++++++++++------------ 5 files changed, 33 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5e88e75..ac4660a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -360,14 +360,15 @@ struct radeon_fence_driver { };
struct radeon_fence { - struct fence base; + struct fence base;
- struct radeon_device *rdev; - uint64_t seq; + struct radeon_device *rdev; + uint64_t seq; /* RB, DMA, etc. */ - unsigned ring; + unsigned ring; + bool is_vm_update;
- wait_queue_t fence_wake; + wait_queue_t fence_wake; };
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -594,6 +595,7 @@ void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_sync { struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS]; struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + struct radeon_fence *last_vm_update; };
void radeon_sync_create(struct radeon_sync *sync); @@ -922,8 +924,8 @@ struct radeon_vm { struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; - /* last flush or NULL if we still need to flush */ - struct radeon_fence *last_flush; + /* last flushed PD/PT update */ + struct radeon_fence *flushed_updates; /* last use of vmid */ struct radeon_fence *last_id_use; }; @@ -2961,7 +2963,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, - int ring); + int ring, struct radeon_fence *fence); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 9951670..d13d1b5 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -140,6 +140,7 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->rdev = rdev; (*fence)->seq = seq; (*fence)->ring = ring; + (*fence)->is_vm_update = false; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 56a1704..c39ce1f 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -154,7 +154,8 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, }
if (ib->vm) - radeon_vm_flush(rdev, ib->vm, ib->ring); + radeon_vm_flush(rdev, ib->vm, ib->ring, + ib->sync.last_vm_update);
if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index ddd88fb..02ac8a1 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -48,6 +48,8 @@ void radeon_sync_create(struct radeon_sync *sync)
for (i = 0; i < RADEON_NUM_RINGS; ++i) sync->sync_to[i] = NULL; + + sync->last_vm_update = NULL; }
/** @@ -68,6 +70,11 @@ void radeon_sync_fence(struct radeon_sync *sync,
other = sync->sync_to[fence->ring]; sync->sync_to[fence->ring] = radeon_fence_later(fence, other); + + if (fence->is_vm_update) { + other = sync->last_vm_update; + sync->last_vm_update = radeon_fence_later(fence, other); + } }
/** diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 4ca2779..6ff5741 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -190,7 +190,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, return NULL;
/* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); + vm->pd_gpu_addr = ~0ll;
/* skip over VMID 0, since it is the system VM */ for (i = 1; i < rdev->vm_manager.nvm; ++i) { @@ -228,6 +228,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, * @rdev: radeon_device pointer * @vm: vm we want to flush * @ring: ring to use for flush + * @updates: last vm update that is waited for * * Flush the vm (cayman+). * @@ -235,13 +236,16 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, */ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, - int ring) + int ring, struct radeon_fence *updates) { uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
- /* if we can't remember our last VM flush then flush now! */ - if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { + if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || + radeon_fence_is_earlier(vm->flushed_updates, updates)) { + trace_radeon_vm_flush(pd_addr, ring, vm->id); + radeon_fence_unref(&vm->flushed_updates); + vm->flushed_updates = radeon_fence_ref(updates); vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, &rdev->ring[ring], vm->id, vm->pd_gpu_addr); @@ -272,10 +276,6 @@ void radeon_vm_fence(struct radeon_device *rdev,
radeon_fence_unref(&vm->last_id_use); vm->last_id_use = radeon_fence_ref(fence); - - /* we just flushed the VM, remember that */ - if (!vm->last_flush) - vm->last_flush = radeon_fence_ref(fence); }
/** @@ -418,6 +418,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, if (r) goto error_free;
+ ib.fence->is_vm_update = true; radeon_bo_fence(bo, ib.fence, false);
error_free: @@ -697,10 +698,10 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + ib.fence->is_vm_update = true; radeon_bo_fence(pd, ib.fence, false); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); - radeon_fence_unref(&vm->last_flush); } radeon_ib_free(rdev, &ib);
@@ -989,11 +990,11 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + ib.fence->is_vm_update = true; radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush);
return 0; } @@ -1124,7 +1125,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->id = 0; vm->ib_bo_va = NULL; vm->fence = NULL; - vm->last_flush = NULL; + vm->flushed_updates = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); vm->va = RB_ROOT; @@ -1196,7 +1197,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_bo_unref(&vm->page_directory);
radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->flushed_updates); radeon_fence_unref(&vm->last_id_use);
mutex_destroy(&vm->mutex);
From: Christian König christian.koenig@amd.com
Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 4 +-- drivers/gpu/drm/radeon/cik_sdma.c | 2 +- drivers/gpu/drm/radeon/ni.c | 6 ++-- drivers/gpu/drm/radeon/ni_dma.c | 3 +- drivers/gpu/drm/radeon/radeon.h | 36 +++++++++++++---------- drivers/gpu/drm/radeon/radeon_vm.c | 59 +++++++++++++++++++++++--------------- drivers/gpu/drm/radeon/si.c | 6 ++-- 7 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6bb8b84..510aeef 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4051,6 +4051,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header, control = INDIRECT_BUFFER_VALID;
if (ib->is_const_ib) { @@ -4079,8 +4080,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); }
- control |= ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0); + control |= ib->length_dw | (vm_id << 24);
radeon_ring_write(ring, header); radeon_ring_write(ring, diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 604e2e7..54b9837 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;
if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index bee432d..360de9f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1373,6 +1373,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA;
@@ -1395,15 +1396,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
/* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); - radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */ }
static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 5a72404..50f8861 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; @@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ac4660a..e3e77f7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -901,33 +901,39 @@ struct radeon_vm_pt { uint64_t addr; };
+struct radeon_vm_id { + unsigned id; + uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ + struct radeon_fence *flushed_updates; + /* last use of vmid */ + struct radeon_fence *last_id_use; +}; + struct radeon_vm { - struct rb_root va; - unsigned id; + struct rb_root va;
/* BOs moved, but not yet updated in the PT */ - struct list_head invalidated; + struct list_head invalidated;
/* BOs freed, but not yet updated in the PT */ - struct list_head freed; + struct list_head freed;
/* contains the page directory */ - struct radeon_bo *page_directory; - uint64_t pd_gpu_addr; - unsigned max_pde_used; + struct radeon_bo *page_directory; + unsigned max_pde_used;
/* array of page tables, one for each page directory entry */ - struct radeon_vm_pt *page_tables; + struct radeon_vm_pt *page_tables;
- struct radeon_bo_va *ib_bo_va; + struct radeon_bo_va *ib_bo_va;
- struct mutex mutex; + struct mutex mutex; /* last fence for cs using this vm */ - struct radeon_fence *fence; - /* last flushed PD/PT update */ - struct radeon_fence *flushed_updates; - /* last use of vmid */ - struct radeon_fence *last_id_use; + struct radeon_fence *fence; + + /* for id and flush management per ring */ + struct radeon_vm_id ids[RADEON_NUM_RINGS]; };
struct radeon_vm_manager { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 6ff5741..e38efe4 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + struct radeon_vm_id *vm_id = &vm->ids[ring]; + unsigned choices[2] = {}; unsigned i;
/* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + if (vm_id->id && vm_id->last_id_use && + vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) return NULL;
/* we definately need to flush */ - vm->pd_gpu_addr = ~0ll; + vm_id->pd_gpu_addr = ~0ll;
/* skip over VMID 0, since it is the system VM */ for (i = 1; i < rdev->vm_manager.nvm; ++i) { @@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
if (fence == NULL) { /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); + vm_id->id = i; + trace_radeon_vm_grab_id(i, ring); return NULL; }
@@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
for (i = 0; i < 2; ++i) { if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); + vm_id->id = choices[i]; + trace_radeon_vm_grab_id(choices[i], ring); return rdev->vm_manager.active[choices[i]]; } } @@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev, int ring, struct radeon_fence *updates) { uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + struct radeon_vm_id *vm_id = &vm->ids[ring];
- if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || - radeon_fence_is_earlier(vm->flushed_updates, updates)) { + if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || + radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
- trace_radeon_vm_flush(pd_addr, ring, vm->id); - radeon_fence_unref(&vm->flushed_updates); - vm->flushed_updates = radeon_fence_ref(updates); - vm->pd_gpu_addr = pd_addr; + trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); + radeon_fence_unref(&vm_id->flushed_updates); + vm_id->flushed_updates = radeon_fence_ref(updates); + vm_id->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, &rdev->ring[ring], - vm->id, vm->pd_gpu_addr); + vm_id->id, vm_id->pd_gpu_addr); + } }
@@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence) { + unsigned vm_id = vm->ids[fence->ring].id; + radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(fence);
- radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + radeon_fence_unref(&rdev->vm_manager.active[vm_id]); + rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
- radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); + radeon_fence_unref(&vm->ids[fence->ring].last_id_use); + vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); }
/** @@ -1120,13 +1127,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, RADEON_VM_PTE_COUNT * 8); unsigned pd_size, pd_entries, pts_size; - int r; + int i, r;
- vm->id = 0; vm->ib_bo_va = NULL; vm->fence = NULL; - vm->flushed_updates = NULL; - vm->last_id_use = NULL; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + vm->ids[i].id = 0; + vm->ids[i].flushed_updates = NULL; + vm->ids[i].last_id_use = NULL; + } mutex_init(&vm->mutex); vm->va = RB_ROOT; INIT_LIST_HEAD(&vm->invalidated); @@ -1197,8 +1207,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_bo_unref(&vm->page_directory);
radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->flushed_updates); - radeon_fence_unref(&vm->last_id_use); + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + radeon_fence_unref(&vm->ids[i].flushed_updates); + radeon_fence_unref(&vm->ids[i].last_id_use); + }
mutex_destroy(&vm->mutex); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index e91968b..14896ce 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header;
if (ib->is_const_ib) { @@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
if (!ib->is_const_ib) { /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); + radeon_ring_write(ring, vm_id); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | PACKET3_TC_ACTION_ENA |
From: Christian König christian.koenig@amd.com
This allows us to finally remove the VM fence and so allow concurrent use of it from different engines.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 7 +++---- drivers/gpu/drm/radeon/radeon_cs.c | 6 +++++- drivers/gpu/drm/radeon/radeon_vm.c | 17 ++++++----------- 3 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e3e77f7..1805067 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -456,6 +456,7 @@ struct radeon_bo_va { struct list_head bo_list; uint32_t flags; uint64_t addr; + struct radeon_fence *last_pt_update; unsigned ref_count;
/* protected by vm mutex */ @@ -911,6 +912,8 @@ struct radeon_vm_id { };
struct radeon_vm { + struct mutex mutex; + struct rb_root va;
/* BOs moved, but not yet updated in the PT */ @@ -928,10 +931,6 @@ struct radeon_vm {
struct radeon_bo_va *ib_bo_va;
- struct mutex mutex; - /* last fence for cs using this vm */ - struct radeon_fence *fence; - /* for id and flush management per ring */ struct radeon_vm_id ids[RADEON_NUM_RINGS]; }; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 30437aa..75f22e5 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -505,6 +505,9 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, if (r) return r;
+ radeon_sync_resv(p->rdev, &p->ib.sync, vm->page_directory->tbo.resv, + true); + r = radeon_vm_clear_freed(rdev, vm); if (r) return r; @@ -536,6 +539,8 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); if (r) return r; + + radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); }
return radeon_vm_clear_invalids(rdev, vm); @@ -580,7 +585,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, DRM_ERROR("Failed to sync rings: %i\n", r); goto out; } - radeon_sync_fence(&parser->ib.sync, vm->fence);
if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index e38efe4..f457614 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -275,9 +275,6 @@ void radeon_vm_fence(struct radeon_device *rdev, { unsigned vm_id = vm->ids[fence->ring].id;
- radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - radeon_fence_unref(&rdev->vm_manager.active[vm_id]); rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
@@ -707,8 +704,6 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, } ib.fence->is_vm_update = true; radeon_bo_fence(pd, ib.fence, false); - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); } radeon_ib_free(rdev, &ib);
@@ -999,8 +994,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev, } ib.fence->is_vm_update = true; radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&bo_va->last_pt_update); + bo_va->last_pt_update = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib);
return 0; @@ -1026,6 +1021,7 @@ int radeon_vm_clear_freed(struct radeon_device *rdev, list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { r = radeon_vm_bo_update(rdev, bo_va, NULL); radeon_bo_unref(&bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); if (r) return r; @@ -1084,6 +1080,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); }
@@ -1130,8 +1127,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) int i, r;
vm->ib_bo_va = NULL; - vm->fence = NULL; - for (i = 0; i < RADEON_NUM_RINGS; ++i) { vm->ids[i].id = 0; vm->ids[i].flushed_updates = NULL; @@ -1192,11 +1187,13 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) if (!r) { list_del_init(&bo_va->bo_list); radeon_bo_unreserve(bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); } } list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { radeon_bo_unref(&bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); }
@@ -1206,8 +1203,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
radeon_bo_unref(&vm->page_directory);
- radeon_fence_unref(&vm->fence); - for (i = 0; i < RADEON_NUM_RINGS; ++i) { radeon_fence_unref(&vm->ids[i].flushed_updates); radeon_fence_unref(&vm->ids[i].last_id_use);
From: Christian König christian.koenig@amd.com
We never invalidate PD entries and making them valid can run with other users in parallel.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index f457614..470451c 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -695,7 +695,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);
- radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, false); + radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) {
From: Christian König christian.koenig@amd.com
Only invalidating PTEs needs to be executed synchronized to using the PT.
v2: fix sync to uses
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 470451c..0b10f3a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -820,7 +820,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned nptes; uint64_t pte;
- radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, false); + radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true);
if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; @@ -980,6 +980,13 @@ int radeon_vm_bo_update(struct radeon_device *rdev, return r; ib.length_dw = 0;
+ if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { + unsigned i; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); + } + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, bo_va->it.last + 1, addr, radeon_vm_page_flags(bo_va->flags));
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index f752c7f..2e0e370 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -518,6 +518,68 @@ out: return r; }
+/** + * radeon_gem_va_update_vm -update the bo_va in its VM + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to update + * + * Update the bo_va directly after setting it's address. Errors are not + * vital here, so they are not reported back to userspace. + */ +static void radeon_gem_va_update_vm(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + struct ttm_validate_buffer tv, *entry; + struct radeon_cs_reloc *vm_bos; + struct ww_acquire_ctx ticket; + struct list_head list; + unsigned domain; + int r; + + INIT_LIST_HEAD(&list); + + tv.bo = &bo_va->bo->tbo; + tv.shared = true; + list_add(&tv.head, &list); + + vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list); + if (!vm_bos) + return; + + r = ttm_eu_reserve_buffers(&ticket, &list, true); + if (r) + goto error_free; + + list_for_each_entry(entry, &list, head) { + domain = radeon_mem_type_to_domain(entry->bo->mem.mem_type); + /* if anything is swapped out don't swap it in here, + just abort and wait for the next CS */ + if (domain == RADEON_GEM_DOMAIN_CPU) + goto error_unreserve; + } + + mutex_lock(&bo_va->vm->mutex); + r = radeon_vm_clear_freed(rdev, bo_va->vm); + if (r) + goto error_unlock; + + if (bo_va->it.start) + r = radeon_vm_bo_update(rdev, bo_va, &bo_va->bo->tbo.mem); + +error_unlock: + mutex_unlock(&bo_va->vm->mutex); + +error_unreserve: + ttm_eu_backoff_reservation(&ticket, &list); + +error_free: + kfree(vm_bos); + + if (r) + DRM_ERROR("Couldn't update BO_VA (%d)\n", r); +} + int radeon_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -612,6 +674,8 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } + if (!r) + radeon_gem_va_update_vm(rdev, bo_va); args->operation = RADEON_VA_RESULT_OK; if (r) { args->operation = RADEON_VA_RESULT_ERROR;
Dear Christian, Christian König wrote on 19.11.2014 14:01:
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
Is this a fix for http://thread.gmane.org/gmane.comp.video.dri.devel/118415 which I bisected to the v2 of this patch (http://thread.gmane.org/gmane.comp.video.dri.devel/118415/focus=118425) as present on the drm-next-3.19-wip branch?
Cheers, Kai
Am 19.11.2014 um 14:16 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:01:
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
Is this a fix for http://thread.gmane.org/gmane.comp.video.dri.devel/118415 which I bisected to the v2 of this patch (http://thread.gmane.org/gmane.comp.video.dri.devel/118415/focus=118425) as present on the drm-next-3.19-wip branch?
Yes and no, it was actually the patch before this one which triggered the problem. The last one just made it much more likely to appear.
Please test the whole patchset on top of Dave's drm-next tree if your problem still exists.
Thanks, Christian.
Cheers, Kai
Dear Christian, Christian König wrote on 19.11.2014 14:35:
Am 19.11.2014 um 14:16 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:01:
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
Is this a fix for http://thread.gmane.org/gmane.comp.video.dri.devel/118415 which I bisected to the v2 of this patch (http://thread.gmane.org/gmane.comp.video.dri.devel/118415/focus=118425) as present on the drm-next-3.19-wip branch?
Yes and no, it was actually the patch before this one which triggered the problem. The last one just made it much more likely to appear.
Please test the whole patchset on top of Dave's drm-next tree if your problem still exists.
this is still bad:
[ 117.818981] BUG: unable to handle kernel paging request at ffffeae3801564d8 [ 117.819019] IP: [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819049] PGD 0 [ 117.819059] Oops: 0000 [#1] SMP [ 117.819077] Modules linked in: serpent_avx_x86_64 serpent_sse2_x86_64 serpent_generic blowfish_x86_64 blowfish_common ecb cmac sha512_ssse3 sha512_generic sha256_ssse3 sha256_generic nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc nls_utf8 nls_cp437 vfat fat snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi iTCO_wdt iTCO_vendor_support radeon snd_hda_intel x86_pkg_temp_thermal snd_hda_controller drm_kms_helper ttm snd_hda_codec snd_hwdep snd_pcm_oss mei_me video snd_mixer_oss i2c_i801 coretemp snd_pcm mei lpc_ich mfd_core evdev joydev processor snd_timer snd soundcore button serio_raw kvm_intel kvm pcspkr efivars fuse parport_pc ppdev lp parport ext4 crc16 mbcache jbd2 btrfs xor raid6_pq twofish_generic twofish_avx_x86_64 twofish_x86_64_3way twofish_x86_64 twofish_common [ 117.819454] xts af_alg hid_generic usbhid dm_crypt dm_mod microcode hid_lg_g710_plus(O) hid sg sr_mod sd_mod cdrom crct10dif_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd ahci libahci libata atl1c thermal fan thermal_sys [ 117.819587] CPU: 1 PID: 1959 Comm: Dreamfall Chapt Tainted: G O 3.18.0-rc4-citadel-airlied-drm-next-with-ck-patches.0.1 #1 [ 117.819634] Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Z77-DS3H, BIOS F11a 11/13/2013 [ 117.819673] task: ffff8800d4417650 ti: ffff8800d21a0000 task.ti: ffff8800d21a0000 [ 117.819702] RIP: 0010:[<ffffffff8111e6b1>] [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819737] RSP: 0018:ffff8800d21a3cf0 EFLAGS: 00010086 [ 117.819758] RAX: ffffeae3801564d8 RBX: 0000000000000286 RCX: 000077ff80000000 [ 117.819787] RDX: ffffea0000000000 RSI: ffff8800d21a3d30 RDI: ffffc900061cd000 [ 117.819815] RBP: ffffc900061cd000 R08: 0000000000000000 R09: ffff880407859008 [ 117.819843] R10: ffff880407858fe0 R11: 000000000007ffff R12: ffffffffa0602de8 [ 117.819870] R13: ffff880407858000 R14: ffff88039f261ac0 R15: ffff880403d876c0 [ 117.819898] FS: 00007f10c309b780(0000) GS:ffff88041ec40000(0000) knlGS:0000000000000000 [ 117.819930] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 117.819952] CR2: ffffeae3801564d8 CR3: 00000000dcad1000 CR4: 00000000001407e0 [ 117.819979] Stack: [ 117.819988] ffffffff8111eda1 ffff8800d21a3de0 0000000000000000 ffff8800d21a3d30 [ 117.820022] ffffffffa0602de8 ffffc900061cd000 00000000a060245a 0000000000000000 [ 117.820055] ffffc900061ce0a8 ffff8800d21a3d58 ffff8800d4417650 0000000000001e7d [ 117.820089] Call Trace: [ 117.820101] [<ffffffff8111eda1>] ? kfree+0x2e/0x6d [ 117.820137] [<ffffffffa0602de8>] ? radeon_gem_va_ioctl+0x28c/0x2d3 [radeon] [ 117.820176] [<ffffffffa0602601>] ? radeon_gem_create_ioctl+0xa4/0xc3 [radeon] [ 117.820207] [<ffffffff812be431>] ? drm_ioctl+0x35b/0x3e1 [ 117.820238] [<ffffffffa0602b5c>] ? radeon_gem_get_tiling_ioctl+0x8e/0x8e [radeon] [ 117.820270] [<ffffffff81440f2c>] ? _raw_spin_unlock_irqrestore+0xc/0xd [ 117.820303] [<ffffffffa05de04b>] ? radeon_drm_ioctl+0x4b/0x7a [radeon] [ 117.820331] [<ffffffff8113e795>] ? do_vfs_ioctl+0x34e/0x404 [ 117.820355] [<ffffffff811312a4>] ? vfs_read+0xbc/0xea [ 117.820377] [<ffffffff8113e89c>] ? SyS_ioctl+0x51/0x77 [ 117.820398] [<ffffffff814414e9>] ? system_call_fastpath+0x12/0x17 [ 117.820423] Code: 00 00 80 ff 77 00 00 48 01 fa 48 0f 42 0d 78 99 6f 00 48 8d 04 11 48 ba 00 00 00 00 00 ea ff ff 48 c1 e8 0c 48 6b c0 38 48 01 d0 <48> 8b 10 80 e6 80 74 0e 48 8b 50 30 48 8b 08 80 e5 80 48 0f 45 [ 117.820582] RIP [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.820608] RSP <ffff8800d21a3cf0> [ 117.820622] CR2: ffffeae3801564d8 [ 117.838461] ---[ end trace a6e2a6aa1df3196f ]---
I've used Dave Airlie's drm-next as a base (commit d0d6c524bf1d72e6d64134c3a315b77deecc9252) and "git am"-applied your series (no issues, applied cleanly) on top. Steam games are still entering the defunct state as soon as the 3D engines are fired up on a kernel built from that source tree.
This is with (Debian testing as a base): GPU: Hawaii PRO [Radeon R9 290] (ChipID = 0x67b1) Mesa: Git:master/b69c7c5dac libdrm: Git:master/00847fa48b LLVM: SVN:trunk/r222254 (3.6 devel) X.Org: 2:1.16.1-1 Firmware: http://people.freedesktop.org/~agd5f/radeon_ucode/ # 9e05820da42549ce9c89d147cf1f8e19 hawaii_ce.bin # c8bab593090fc54f239c8d7596c8d846 hawaii_mc.bin # 3618dbb955d8a84970e262bb2e6d2a16 hawaii_me.bin # c000b0fc9ff6582145f66504b0ec9597 hawaii_mec.bin # 0643ad24b3beff2214cce533e094c1b7 hawaii_pfp.bin # ba6054b7d78184a74602fd81607e1386 hawaii_rlc.bin # 11288f635737331b69de9ee82fe04898 hawaii_sdma.bin # 284429675a5560e0fad42aa982965fc2 hawaii_smc.bin libclc: Git:master/7f6f5bff1f DDX: 1:7.5.0-1
Let me know, if you need something else; see also the original thread http://thread.gmane.org/gmane.comp.video.dri.devel/118415 for further information.
Cheers, Kai
Ah! Yes of course, we have changed we way memory is allocated for the BO list in the meantime.
Does it work if you replace the last patch in the list with the attached one?
Thanks for pointing this out, Christian.
Am 19.11.2014 um 16:43 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:35:
Am 19.11.2014 um 14:16 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:01:
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
Is this a fix for http://thread.gmane.org/gmane.comp.video.dri.devel/118415 which I bisected to the v2 of this patch (http://thread.gmane.org/gmane.comp.video.dri.devel/118415/focus=118425) as present on the drm-next-3.19-wip branch?
Yes and no, it was actually the patch before this one which triggered the problem. The last one just made it much more likely to appear.
Please test the whole patchset on top of Dave's drm-next tree if your problem still exists.
this is still bad:
[ 117.818981] BUG: unable to handle kernel paging request at ffffeae3801564d8 [ 117.819019] IP: [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819049] PGD 0 [ 117.819059] Oops: 0000 [#1] SMP [ 117.819077] Modules linked in: serpent_avx_x86_64 serpent_sse2_x86_64 serpent_generic blowfish_x86_64 blowfish_common ecb cmac sha512_ssse3 sha512_generic sha256_ssse3 sha256_generic nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc nls_utf8 nls_cp437 vfat fat snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi iTCO_wdt iTCO_vendor_support radeon snd_hda_intel x86_pkg_temp_thermal snd_hda_controller drm_kms_helper ttm snd_hda_codec snd_hwdep snd_pcm_oss mei_me video snd_mixer_oss i2c_i801 coretemp snd_pcm mei lpc_ich mfd_core evdev joydev processor snd_timer snd soundcore button serio_raw kvm_intel kvm pcspkr efivars fuse parport_pc ppdev lp parport ext4 crc16 mbcache jbd2 btrfs xor raid6_pq twofish_generic twofish_avx_x86_64 twofish_x86_64_3way twofish_x86_64 twofish_common [ 117.819454] xts af_alg hid_generic usbhid dm_crypt dm_mod microcode hid_lg_g710_plus(O) hid sg sr_mod sd_mod cdrom crct10dif_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd ahci libahci libata atl1c thermal fan thermal_sys [ 117.819587] CPU: 1 PID: 1959 Comm: Dreamfall Chapt Tainted: G O 3.18.0-rc4-citadel-airlied-drm-next-with-ck-patches.0.1 #1 [ 117.819634] Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Z77-DS3H, BIOS F11a 11/13/2013 [ 117.819673] task: ffff8800d4417650 ti: ffff8800d21a0000 task.ti: ffff8800d21a0000 [ 117.819702] RIP: 0010:[<ffffffff8111e6b1>] [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819737] RSP: 0018:ffff8800d21a3cf0 EFLAGS: 00010086 [ 117.819758] RAX: ffffeae3801564d8 RBX: 0000000000000286 RCX: 000077ff80000000 [ 117.819787] RDX: ffffea0000000000 RSI: ffff8800d21a3d30 RDI: ffffc900061cd000 [ 117.819815] RBP: ffffc900061cd000 R08: 0000000000000000 R09: ffff880407859008 [ 117.819843] R10: ffff880407858fe0 R11: 000000000007ffff R12: ffffffffa0602de8 [ 117.819870] R13: ffff880407858000 R14: ffff88039f261ac0 R15: ffff880403d876c0 [ 117.819898] FS: 00007f10c309b780(0000) GS:ffff88041ec40000(0000) knlGS:0000000000000000 [ 117.819930] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 117.819952] CR2: ffffeae3801564d8 CR3: 00000000dcad1000 CR4: 00000000001407e0 [ 117.819979] Stack: [ 117.819988] ffffffff8111eda1 ffff8800d21a3de0 0000000000000000 ffff8800d21a3d30 [ 117.820022] ffffffffa0602de8 ffffc900061cd000 00000000a060245a 0000000000000000 [ 117.820055] ffffc900061ce0a8 ffff8800d21a3d58 ffff8800d4417650 0000000000001e7d [ 117.820089] Call Trace: [ 117.820101] [<ffffffff8111eda1>] ? kfree+0x2e/0x6d [ 117.820137] [<ffffffffa0602de8>] ? radeon_gem_va_ioctl+0x28c/0x2d3 [radeon] [ 117.820176] [<ffffffffa0602601>] ? radeon_gem_create_ioctl+0xa4/0xc3 [radeon] [ 117.820207] [<ffffffff812be431>] ? drm_ioctl+0x35b/0x3e1 [ 117.820238] [<ffffffffa0602b5c>] ? radeon_gem_get_tiling_ioctl+0x8e/0x8e [radeon] [ 117.820270] [<ffffffff81440f2c>] ? _raw_spin_unlock_irqrestore+0xc/0xd [ 117.820303] [<ffffffffa05de04b>] ? radeon_drm_ioctl+0x4b/0x7a [radeon] [ 117.820331] [<ffffffff8113e795>] ? do_vfs_ioctl+0x34e/0x404 [ 117.820355] [<ffffffff811312a4>] ? vfs_read+0xbc/0xea [ 117.820377] [<ffffffff8113e89c>] ? SyS_ioctl+0x51/0x77 [ 117.820398] [<ffffffff814414e9>] ? system_call_fastpath+0x12/0x17 [ 117.820423] Code: 00 00 80 ff 77 00 00 48 01 fa 48 0f 42 0d 78 99 6f 00 48 8d 04 11 48 ba 00 00 00 00 00 ea ff ff 48 c1 e8 0c 48 6b c0 38 48 01 d0 <48> 8b 10 80 e6 80 74 0e 48 8b 50 30 48 8b 08 80 e5 80 48 0f 45 [ 117.820582] RIP [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.820608] RSP <ffff8800d21a3cf0> [ 117.820622] CR2: ffffeae3801564d8 [ 117.838461] ---[ end trace a6e2a6aa1df3196f ]---
I've used Dave Airlie's drm-next as a base (commit d0d6c524bf1d72e6d64134c3a315b77deecc9252) and "git am"-applied your series (no issues, applied cleanly) on top. Steam games are still entering the defunct state as soon as the 3D engines are fired up on a kernel built from that source tree.
This is with (Debian testing as a base): GPU: Hawaii PRO [Radeon R9 290] (ChipID = 0x67b1) Mesa: Git:master/b69c7c5dac libdrm: Git:master/00847fa48b LLVM: SVN:trunk/r222254 (3.6 devel) X.Org: 2:1.16.1-1 Firmware: http://people.freedesktop.org/~agd5f/radeon_ucode/ # 9e05820da42549ce9c89d147cf1f8e19 hawaii_ce.bin # c8bab593090fc54f239c8d7596c8d846 hawaii_mc.bin # 3618dbb955d8a84970e262bb2e6d2a16 hawaii_me.bin # c000b0fc9ff6582145f66504b0ec9597 hawaii_mec.bin # 0643ad24b3beff2214cce533e094c1b7 hawaii_pfp.bin # ba6054b7d78184a74602fd81607e1386 hawaii_rlc.bin # 11288f635737331b69de9ee82fe04898 hawaii_sdma.bin # 284429675a5560e0fad42aa982965fc2 hawaii_smc.bin libclc: Git:master/7f6f5bff1f DDX: 1:7.5.0-1
Let me know, if you need something else; see also the original thread http://thread.gmane.org/gmane.comp.video.dri.devel/118415 for further information.
Cheers, Kai
Yes, that seems to fix the problem!
You can have my Tested-by: Kai Wasserbäch kai@dev.carbon-project.org
Thanks, Kai
Christian König wrote on 19.11.2014 17:18:
Ah! Yes of course, we have changed we way memory is allocated for the BO list in the meantime.
Does it work if you replace the last patch in the list with the attached one?
Thanks for pointing this out, Christian.
Am 19.11.2014 um 16:43 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:35:
Am 19.11.2014 um 14:16 schrieb Kai Wasserbäch:
Dear Christian, Christian König wrote on 19.11.2014 14:01:
From: Christian König christian.koenig@amd.com
This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.
v2: fix vm lock v3: immediately update unmaps as well
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gem.c | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+)
Is this a fix for http://thread.gmane.org/gmane.comp.video.dri.devel/118415 which I bisected to the v2 of this patch (http://thread.gmane.org/gmane.comp.video.dri.devel/118415/focus=118425) as present on the drm-next-3.19-wip branch?
Yes and no, it was actually the patch before this one which triggered the problem. The last one just made it much more likely to appear.
Please test the whole patchset on top of Dave's drm-next tree if your problem still exists.
this is still bad:
[ 117.818981] BUG: unable to handle kernel paging request at ffffeae3801564d8 [ 117.819019] IP: [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819049] PGD 0 [ 117.819059] Oops: 0000 [#1] SMP [ 117.819077] Modules linked in: serpent_avx_x86_64 serpent_sse2_x86_64 serpent_generic blowfish_x86_64 blowfish_common ecb cmac sha512_ssse3 sha512_generic sha256_ssse3 sha256_generic nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc nls_utf8 nls_cp437 vfat fat snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi iTCO_wdt iTCO_vendor_support radeon snd_hda_intel x86_pkg_temp_thermal snd_hda_controller drm_kms_helper ttm snd_hda_codec snd_hwdep snd_pcm_oss mei_me video snd_mixer_oss i2c_i801 coretemp snd_pcm mei lpc_ich mfd_core evdev joydev processor snd_timer snd soundcore button serio_raw kvm_intel kvm pcspkr efivars fuse parport_pc ppdev lp parport ext4 crc16 mbcache jbd2 btrfs xor raid6_pq twofish_generic twofish_avx_x86_64 twofish_x86_64_3way twofish_x86_64 twofish_common [ 117.819454] xts af_alg hid_generic usbhid dm_crypt dm_mod microcode hid_lg_g710_plus(O) hid sg sr_mod sd_mod cdrom crct10dif_pclmul crc32c_intel ghash_clmulni_intel aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd ahci libahci libata atl1c thermal fan thermal_sys [ 117.819587] CPU: 1 PID: 1959 Comm: Dreamfall Chapt Tainted: G O 3.18.0-rc4-citadel-airlied-drm-next-with-ck-patches.0.1 #1 [ 117.819634] Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Z77-DS3H, BIOS F11a 11/13/2013 [ 117.819673] task: ffff8800d4417650 ti: ffff8800d21a0000 task.ti: ffff8800d21a0000 [ 117.819702] RIP: 0010:[<ffffffff8111e6b1>] [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.819737] RSP: 0018:ffff8800d21a3cf0 EFLAGS: 00010086 [ 117.819758] RAX: ffffeae3801564d8 RBX: 0000000000000286 RCX: 000077ff80000000 [ 117.819787] RDX: ffffea0000000000 RSI: ffff8800d21a3d30 RDI: ffffc900061cd000 [ 117.819815] RBP: ffffc900061cd000 R08: 0000000000000000 R09: ffff880407859008 [ 117.819843] R10: ffff880407858fe0 R11: 000000000007ffff R12: ffffffffa0602de8 [ 117.819870] R13: ffff880407858000 R14: ffff88039f261ac0 R15: ffff880403d876c0 [ 117.819898] FS: 00007f10c309b780(0000) GS:ffff88041ec40000(0000) knlGS:0000000000000000 [ 117.819930] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 117.819952] CR2: ffffeae3801564d8 CR3: 00000000dcad1000 CR4: 00000000001407e0 [ 117.819979] Stack: [ 117.819988] ffffffff8111eda1 ffff8800d21a3de0 0000000000000000 ffff8800d21a3d30 [ 117.820022] ffffffffa0602de8 ffffc900061cd000 00000000a060245a 0000000000000000 [ 117.820055] ffffc900061ce0a8 ffff8800d21a3d58 ffff8800d4417650 0000000000001e7d [ 117.820089] Call Trace: [ 117.820101] [<ffffffff8111eda1>] ? kfree+0x2e/0x6d [ 117.820137] [<ffffffffa0602de8>] ? radeon_gem_va_ioctl+0x28c/0x2d3 [radeon] [ 117.820176] [<ffffffffa0602601>] ? radeon_gem_create_ioctl+0xa4/0xc3 [radeon] [ 117.820207] [<ffffffff812be431>] ? drm_ioctl+0x35b/0x3e1 [ 117.820238] [<ffffffffa0602b5c>] ? radeon_gem_get_tiling_ioctl+0x8e/0x8e [radeon] [ 117.820270] [<ffffffff81440f2c>] ? _raw_spin_unlock_irqrestore+0xc/0xd [ 117.820303] [<ffffffffa05de04b>] ? radeon_drm_ioctl+0x4b/0x7a [radeon] [ 117.820331] [<ffffffff8113e795>] ? do_vfs_ioctl+0x34e/0x404 [ 117.820355] [<ffffffff811312a4>] ? vfs_read+0xbc/0xea [ 117.820377] [<ffffffff8113e89c>] ? SyS_ioctl+0x51/0x77 [ 117.820398] [<ffffffff814414e9>] ? system_call_fastpath+0x12/0x17 [ 117.820423] Code: 00 00 80 ff 77 00 00 48 01 fa 48 0f 42 0d 78 99 6f 00 48 8d 04 11 48 ba 00 00 00 00 00 ea ff ff 48 c1 e8 0c 48 6b c0 38 48 01 d0 <48> 8b 10 80 e6 80 74 0e 48 8b 50 30 48 8b 08 80 e5 80 48 0f 45 [ 117.820582] RIP [<ffffffff8111e6b1>] virt_to_head_page+0x33/0x4a [ 117.820608] RSP <ffff8800d21a3cf0> [ 117.820622] CR2: ffffeae3801564d8 [ 117.838461] ---[ end trace a6e2a6aa1df3196f ]---
I've used Dave Airlie's drm-next as a base (commit d0d6c524bf1d72e6d64134c3a315b77deecc9252) and "git am"-applied your series (no issues, applied cleanly) on top. Steam games are still entering the defunct state as soon as the 3D engines are fired up on a kernel built from that source tree.
This is with (Debian testing as a base): GPU: Hawaii PRO [Radeon R9 290] (ChipID = 0x67b1) Mesa: Git:master/b69c7c5dac libdrm: Git:master/00847fa48b LLVM: SVN:trunk/r222254 (3.6 devel) X.Org: 2:1.16.1-1 Firmware: http://people.freedesktop.org/~agd5f/radeon_ucode/ # 9e05820da42549ce9c89d147cf1f8e19 hawaii_ce.bin # c8bab593090fc54f239c8d7596c8d846 hawaii_mc.bin # 3618dbb955d8a84970e262bb2e6d2a16 hawaii_me.bin # c000b0fc9ff6582145f66504b0ec9597 hawaii_mec.bin # 0643ad24b3beff2214cce533e094c1b7 hawaii_pfp.bin # ba6054b7d78184a74602fd81607e1386 hawaii_rlc.bin # 11288f635737331b69de9ee82fe04898 hawaii_sdma.bin # 284429675a5560e0fad42aa982965fc2 hawaii_smc.bin libclc: Git:master/7f6f5bff1f DDX: 1:7.5.0-1
Let me know, if you need something else; see also the original thread http://thread.gmane.org/gmane.comp.video.dri.devel/118415 for further information.
Cheers, Kai
On Wed, Nov 19, 2014 at 8:01 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Use ring structure instead of index and provide vm_id and pd_addr separately.
Signed-off-by: Christian König christian.koenig@amd.com
Pushed to my 3.19-wip tree. I'll send out a new radeon drm-next pull request later today or tomorrow.
Alex
drivers/gpu/drm/radeon/cik.c | 23 ++++++++++------------- drivers/gpu/drm/radeon/cik_sdma.c | 22 +++++++++------------- drivers/gpu/drm/radeon/ni.c | 14 +++++--------- drivers/gpu/drm/radeon/ni_dma.c | 14 +++++--------- drivers/gpu/drm/radeon/radeon.h | 5 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++++++++++++------ drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/radeon/si.c | 18 +++++++----------- drivers/gpu/drm/radeon/si_dma.c | 19 ++++++++----------- 9 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 57a359d..d52ead9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5967,26 +5967,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
- Update the page table base and flush the VM TLB
- using the CP (CIK).
*/ -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
struct radeon_ring *ring = &rdev->ring[ridx];
int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
if (vm == NULL)
return;
int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)));
if (vm->id < 8) {
if (vm_id < 8) { radeon_ring_write(ring,
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring,
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0);
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5994,7 +5991,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, VMID(vm->id));
radeon_ring_write(ring, VMID(vm_id)); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6015,7 +6012,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VMID(0));
/* HDP flush */
cik_hdp_flush_cp_ring_emit(rdev, ridx);
cik_hdp_flush_cp_ring_emit(rdev, ring->idx); /* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6023,7 +6020,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* compute doesn't have PFP */ if (usepfp) {
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 4e8432d..7470a2e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -901,25 +901,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
- Update the page table base and flush the VM TLB
- using sDMA (CIK).
*/ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
if (vm->id < 8) {
radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
if (vm_id < 8) {
radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else {
radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); }
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* update SH_MEM_* regs */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
radeon_ring_write(ring, VMID(vm->id));
radeon_ring_write(ring, VMID(vm_id)); radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SH_MEM_BASES >> 2);
@@ -942,11 +938,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm radeon_ring_write(ring, VMID(0));
/* flush HDP */
cik_sdma_hdp_flush_ring_emit(rdev, ridx);
cik_sdma_hdp_flush_ring_emit(rdev, ring->idx); /* flush TLB */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id);
}
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3faee58..bee432d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2502,15 +2502,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev,
- Update the page table base and flush the VM TLB
- using the CP (cayman-si).
*/ -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0));
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
@@ -2518,7 +2514,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
/* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index f26f0a9..5a72404 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -446,16 +446,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib) ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); }
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
@@ -465,6 +461,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id);
}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 908f349..5d913ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1790,7 +1790,8 @@ struct radeon_asic_ring { void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait);
void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr); /* testing functions */ int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
@@ -2836,7 +2837,7 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp)) -#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm)) +#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr)) #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r)) #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r)) #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index d8ace5b..2a45d54 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -599,7 +599,8 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); @@ -624,7 +625,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cayman_dma_vm_pad_ib(struct radeon_ib *ib);
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
u32 cayman_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); @@ -699,7 +701,8 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); struct radeon_fence *si_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -721,7 +724,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags);
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); @@ -793,7 +797,8 @@ int cik_irq_set(struct radeon_device *rdev); int cik_irq_process(struct radeon_device *rdev); int cik_vm_init(struct radeon_device *rdev); void cik_vm_fini(struct radeon_device *rdev); -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
void cik_sdma_vm_copy_pages(struct radeon_device *rdev, struct radeon_ib *ib, @@ -811,7 +816,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cik_sdma_vm_pad_ib(struct radeon_ib *ib);
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); u32 cik_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index dfde266..9d0f87b 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -243,7 +243,8 @@ void radeon_vm_flush(struct radeon_device *rdev, if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { trace_radeon_vm_flush(pd_addr, ring, vm->id); vm->pd_gpu_addr = pd_addr;
radeon_ring_vm_flush(rdev, ring, vm);
radeon_ring_vm_flush(rdev, &rdev->ring[ring],
vm->id, vm->pd_gpu_addr); }
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index eeea5b6..e91968b 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5020,27 +5020,23 @@ static void si_vm_decode_fault(struct radeon_device *rdev, block, mc_id); }
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
/* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0)));
if (vm->id < 8) {
if (vm_id < 8) { radeon_ring_write(ring,
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring,
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0);
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5056,7 +5052,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index b58f12b..e8bc0a5 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -185,20 +185,17 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, } }
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
+{ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
if (vm->id < 8) {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
if (vm_id < 8) {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); } else {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2)); }
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
@@ -208,7 +205,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id);
}
/**
1.9.1
Am 19.11.2014 um 19:28 schrieb Alex Deucher:
On Wed, Nov 19, 2014 at 8:01 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Use ring structure instead of index and provide vm_id and pd_addr separately.
Signed-off-by: Christian König christian.koenig@amd.com
Pushed to my 3.19-wip tree. I'll send out a new radeon drm-next pull request later today or tomorrow.
Please note that I had to fix another bug in the last patch pointed out by Kai Wasserbäch.
The updated patch should already be in your inbox.
Christian.
Alex
drivers/gpu/drm/radeon/cik.c | 23 ++++++++++------------- drivers/gpu/drm/radeon/cik_sdma.c | 22 +++++++++------------- drivers/gpu/drm/radeon/ni.c | 14 +++++--------- drivers/gpu/drm/radeon/ni_dma.c | 14 +++++--------- drivers/gpu/drm/radeon/radeon.h | 5 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++++++++++++------ drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/radeon/si.c | 18 +++++++----------- drivers/gpu/drm/radeon/si_dma.c | 19 ++++++++----------- 9 files changed, 61 insertions(+), 75 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 57a359d..d52ead9 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5967,26 +5967,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev,
- Update the page table base and flush the VM TLB
- using the CP (CIK).
*/ -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
{unsigned vm_id, uint64_t pd_addr)
struct radeon_ring *ring = &rdev->ring[ridx];
int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
if (vm == NULL)
return;
int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)));
if (vm->id < 8) {
if (vm_id < 8) { radeon_ring_write(ring,
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring,
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0);
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5994,7 +5991,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, VMID(vm->id));
radeon_ring_write(ring, VMID(vm_id)); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -6015,7 +6012,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VMID(0));
/* HDP flush */
cik_hdp_flush_cp_ring_emit(rdev, ridx);
cik_hdp_flush_cp_ring_emit(rdev, ring->idx); /* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -6023,7 +6020,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* compute doesn't have PFP */ if (usepfp) {
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 4e8432d..7470a2e 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -901,25 +901,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib)
- Update the page table base and flush the VM TLB
- using sDMA (CIK).
*/ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
{unsigned vm_id, uint64_t pd_addr)
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
if (vm->id < 8) {
radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
if (vm_id < 8) {
radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else {
radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); }
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* update SH_MEM_* regs */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
radeon_ring_write(ring, VMID(vm->id));
radeon_ring_write(ring, VMID(vm_id)); radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SH_MEM_BASES >> 2);
@@ -942,11 +938,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm radeon_ring_write(ring, VMID(0));
/* flush HDP */
cik_sdma_hdp_flush_ring_emit(rdev, ridx);
cik_sdma_hdp_flush_ring_emit(rdev, ring->idx); /* flush TLB */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
radeon_ring_write(ring, 1 << vm->id);
}radeon_ring_write(ring, 1 << vm_id);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3faee58..bee432d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2502,15 +2502,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev,
- Update the page table base and flush the VM TLB
- using the CP (cayman-si).
*/ -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
{unsigned vm_id, uint64_t pd_addr)
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0));
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
@@ -2518,7 +2514,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
/* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index f26f0a9..5a72404 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -446,16 +446,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib) ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); }
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
{unsigned vm_id, uint64_t pd_addr)
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2));
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
@@ -465,6 +461,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
}radeon_ring_write(ring, 1 << vm_id);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 908f349..5d913ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1790,7 +1790,8 @@ struct radeon_asic_ring { void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait);
void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr); /* testing functions */ int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
@@ -2836,7 +2837,7 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp)) -#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm)) +#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr)) #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r)) #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r)) #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index d8ace5b..2a45d54 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -599,7 +599,8 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);unsigned vm_id, uint64_t pd_addr);
@@ -624,7 +625,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cayman_dma_vm_pad_ib(struct radeon_ib *ib);
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
u32 cayman_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring);
@@ -699,7 +701,8 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); struct radeon_fence *si_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset,unsigned vm_id, uint64_t pd_addr);
@@ -721,7 +724,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags);
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);unsigned vm_id, uint64_t pd_addr);
@@ -793,7 +797,8 @@ int cik_irq_set(struct radeon_device *rdev); int cik_irq_process(struct radeon_device *rdev); int cik_vm_init(struct radeon_device *rdev); void cik_vm_fini(struct radeon_device *rdev); -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr);
void cik_sdma_vm_copy_pages(struct radeon_device *rdev, struct radeon_ib *ib,
@@ -811,7 +816,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cik_sdma_vm_pad_ib(struct radeon_ib *ib);
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); u32 cik_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring);unsigned vm_id, uint64_t pd_addr);
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index dfde266..9d0f87b 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -243,7 +243,8 @@ void radeon_vm_flush(struct radeon_device *rdev, if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { trace_radeon_vm_flush(pd_addr, ring, vm->id); vm->pd_gpu_addr = pd_addr;
radeon_ring_vm_flush(rdev, ring, vm);
radeon_ring_vm_flush(rdev, &rdev->ring[ring],
}vm->id, vm->pd_gpu_addr); }
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index eeea5b6..e91968b 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5020,27 +5020,23 @@ static void si_vm_decode_fault(struct radeon_device *rdev, block, mc_id); }
-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
{unsigned vm_id, uint64_t pd_addr)
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
/* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0)));
if (vm->id < 8) {
if (vm_id < 8) { radeon_ring_write(ring,
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring,
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0);
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
@@ -5056,7 +5052,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0);
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id); /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index b58f12b..e8bc0a5 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -185,20 +185,17 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, } }
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
+void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
unsigned vm_id, uint64_t pd_addr)
+{ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
if (vm->id < 8) {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
if (vm_id < 8) {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); } else {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2)); }
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
radeon_ring_write(ring, pd_addr >> 12); /* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
@@ -208,7 +205,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
radeon_ring_write(ring, 1 << vm_id);
}
/**
-- 1.9.1
Christian König wrote on 19.11.2014 20:40:
Am 19.11.2014 um 19:28 schrieb Alex Deucher:
On Wed, Nov 19, 2014 at 8:01 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Use ring structure instead of index and provide vm_id and pd_addr separately.
Signed-off-by: Christian König christian.koenig@amd.com
Pushed to my 3.19-wip tree. I'll send out a new radeon drm-next pull request later today or tomorrow.
Please note that I had to fix another bug in the last patch pointed out by Kai Wasserbäch.
The updated patch should already be in your inbox.
Everything is fine, I'm seeing v4 on Alex's tree (http://cgit.freedesktop.org/~agd5f/linux/commit/?h=drm-next-3.19-wip&id=219af8ca826344a136f0be43ff2bcc5d33e5ddc6).
On Wed, Nov 19, 2014 at 2:48 PM, Kai Wasserbäch kai@dev.carbon-project.org wrote:
Christian König wrote on 19.11.2014 20:40:
Am 19.11.2014 um 19:28 schrieb Alex Deucher:
On Wed, Nov 19, 2014 at 8:01 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Use ring structure instead of index and provide vm_id and pd_addr separately.
Signed-off-by: Christian König christian.koenig@amd.com
Pushed to my 3.19-wip tree. I'll send out a new radeon drm-next pull request later today or tomorrow.
Please note that I had to fix another bug in the last patch pointed out by Kai Wasserbäch.
The updated patch should already be in your inbox.
Everything is fine, I'm seeing v4 on Alex's tree (http://cgit.freedesktop.org/~agd5f/linux/commit/?h=drm-next-3.19-wip&id=219af8ca826344a136f0be43ff2bcc5d33e5ddc6).
Yup, I grabbed the latest version.
Alex
dri-devel@lists.freedesktop.org