drm/radeon: allow concurrent VM use

List overview All Threads
Download

newer

older

[Bug 75061] bug in clearing color...

Using a HD 4670 still possible?

Christian König

12 Sep 2014 12 Sep '14

11:52 a.m.

Similar to the already pushed patches for allowing concurrent buffers use from different engines this set of patches allows concurrent VM use from different engines at the same time.

Please review and comment, Christian.

Show replies by date

Christian König

12 Sep 12 Sep

11:52 a.m.

New subject: [PATCH 01/11] drm/radeon: rework vm_flush parameters

From: Christian König christian.koenig@amd.com

Use ring structure instead of index and provide vm_id and pd_addr separately.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 23 ++++++++++------------- drivers/gpu/drm/radeon/cik_sdma.c | 22 +++++++++------------- drivers/gpu/drm/radeon/ni.c | 14 +++++--------- drivers/gpu/drm/radeon/ni_dma.c | 14 +++++--------- drivers/gpu/drm/radeon/radeon.h | 5 +++-- drivers/gpu/drm/radeon/radeon_asic.h | 18 ++++++++++++------ drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- drivers/gpu/drm/radeon/si.c | 18 +++++++----------- drivers/gpu/drm/radeon/si_dma.c | 19 ++++++++----------- 9 files changed, 61 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0b5a230..16a861d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5953,26 +5953,23 @@ static void cik_vm_decode_fault(struct radeon_device *rdev, * Update the page table base and flush the VM TLB * using the CP (CIK). */ -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX); - - if (vm == NULL) - return; + int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);

radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); - if (vm->id < 8) { + if (vm_id < 8) { radeon_ring_write(ring, - (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring, - (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);

/* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -5980,7 +5977,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, VMID(vm->id)); + radeon_ring_write(ring, VMID(vm_id));

radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -6001,7 +5998,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VMID(0));

/* HDP flush */ - cik_hdp_flush_cp_ring_emit(rdev, ridx); + cik_hdp_flush_cp_ring_emit(rdev, ring->idx);

/* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -6009,7 +6006,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);

/* compute doesn't have PFP */ if (usepfp) { diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index c01a610..dd73246 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -905,25 +905,21 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib) * Update the page table base and flush the VM TLB * using sDMA (CIK). */ -void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm->id < 8) { - radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + if (vm_id < 8) { + radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { - radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);

/* update SH_MEM_* regs */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); - radeon_ring_write(ring, VMID(vm->id)); + radeon_ring_write(ring, VMID(vm_id));

radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, SH_MEM_BASES >> 2); @@ -946,11 +942,11 @@ void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm radeon_ring_write(ring, VMID(0));

/* flush HDP */ - cik_sdma_hdp_flush_ring_emit(rdev, ridx); + cik_sdma_hdp_flush_ring_emit(rdev, ring->idx);

/* flush TLB */ radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ba89375..7f451aa 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2495,15 +2495,11 @@ void cayman_vm_decode_fault(struct radeon_device *rdev, * Update the page table base and flush the VM TLB * using the CP (cayman-si). */ -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - - radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2), 0)); + radeon_ring_write(ring, pd_addr >> 12);

/* flush hdp cache */ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0)); @@ -2511,7 +2507,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)

/* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);

/* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 8a3e622..1d15f6b 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -452,16 +452,12 @@ void cayman_dma_vm_pad_ib(struct radeon_ib *ib) ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); }

-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); + radeon_ring_write(ring, pd_addr >> 12);

/* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); @@ -471,6 +467,6 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 2e41dc1..a93c9d9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1777,7 +1777,8 @@ struct radeon_asic_ring { void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring); bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, struct radeon_semaphore *semaphore, bool emit_wait); - void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); + void (*vm_flush)(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);

/* testing functions */ int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); @@ -2823,7 +2824,7 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib)) #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp)) -#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm)) +#define radeon_ring_vm_flush(rdev, r, vm_id, pd_addr) (rdev)->asic->ring[(r)->idx]->vm_flush((rdev), (r), (vm_id), (pd_addr)) #define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r)) #define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r)) #define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index ca01bb8..73621e8 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -601,7 +601,8 @@ int cayman_asic_reset(struct radeon_device *rdev); void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); -void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); @@ -626,7 +627,8 @@ void cayman_dma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cayman_dma_vm_pad_ib(struct radeon_ib *ib);

-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);

u32 cayman_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); @@ -701,7 +703,8 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); struct radeon_fence *si_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, @@ -723,7 +726,8 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags);

-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); @@ -795,7 +799,8 @@ int cik_irq_set(struct radeon_device *rdev); int cik_irq_process(struct radeon_device *rdev); int cik_vm_init(struct radeon_device *rdev); void cik_vm_fini(struct radeon_device *rdev); -void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr);

void cik_sdma_vm_copy_pages(struct radeon_device *rdev, struct radeon_ib *ib, @@ -813,7 +818,8 @@ void cik_sdma_vm_set_pages(struct radeon_device *rdev, uint32_t incr, uint32_t flags); void cik_sdma_vm_pad_ib(struct radeon_ib *ib);

-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); +void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr); int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); u32 cik_gfx_get_rptr(struct radeon_device *rdev, struct radeon_ring *ring); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ce87095..29f2f5e 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -243,7 +243,8 @@ void radeon_vm_flush(struct radeon_device *rdev, if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { trace_radeon_vm_flush(pd_addr, ring, vm->id); vm->pd_gpu_addr = pd_addr; - radeon_ring_vm_flush(rdev, ring, vm); + radeon_ring_vm_flush(rdev, &rdev->ring[ring], + vm->id, vm->pd_gpu_addr); } }

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index a1274a3..ecfe149 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5003,27 +5003,23 @@ static void si_vm_decode_fault(struct radeon_device *rdev, block, mc_id); }

-void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) +void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr) { - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; - /* write new base address */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0)));

- if (vm->id < 8) { + if (vm_id < 8) { radeon_ring_write(ring, - (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2); + (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); } else { radeon_ring_write(ring, - (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2); + (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2); } radeon_ring_write(ring, 0); - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);

/* flush hdp cache */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); @@ -5039,7 +5035,7 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id);

/* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 9b0dfbc..6573843 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -185,20 +185,17 @@ void si_dma_vm_set_pages(struct radeon_device *rdev, } }

-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) -{ - struct radeon_ring *ring = &rdev->ring[ridx]; - - if (vm == NULL) - return; +void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, + unsigned vm_id, uint64_t pd_addr)

+{ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm->id < 8) { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2)); + if (vm_id < 8) { + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); } else { - radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2)); + radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2)); } - radeon_ring_write(ring, vm->pd_gpu_addr >> 12); + radeon_ring_write(ring, pd_addr >> 12);

/* flush hdp cache */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); @@ -208,7 +205,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); - radeon_ring_write(ring, 1 << vm->id); + radeon_ring_write(ring, 1 << vm_id); }

/**

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 02/11] drm/radeon: stop re-reserving the BO in radeon_vm_bo_set_addr

From: Christian König christian.koenig@amd.com

That's useless when all callers drop the reservation immediately after calling the function.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_kms.c | 2 -- drivers/gpu/drm/radeon/radeon_vm.c | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 4b7c8ec..cbd7244 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -601,6 +601,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, if (bo_va->it.start) { args->operation = RADEON_VA_RESULT_VA_EXIST; args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; + radeon_bo_unreserve(rbo); goto out; } r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); @@ -616,7 +617,6 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, args->operation = RADEON_VA_RESULT_ERROR; } out: - radeon_bo_unreserve(rbo); drm_gem_object_unreference_unlocked(gobj); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8309b11..85ee6f7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -621,8 +621,6 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); - - radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { radeon_vm_fini(rdev, vm); kfree(fpriv); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 29f2f5e..b53576e 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -450,7 +450,7 @@ error: * Validate and set the offset requested within the vm address space. * Returns 0 for success, error for failure. * - * Object has to be reserved! + * Object has to be reserved and gets unreserved by this function! */ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_bo_va *bo_va, @@ -575,7 +575,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, }

mutex_unlock(&vm->mutex); - return radeon_bo_reserve(bo_va->bo, false); + return 0; }

/**

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 03/11] drm/radeon: remove unnecessary VM syncs

From: Christian König christian.koenig@amd.com

The PD/PTs reservation object now contains everything needed.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 2 -- 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index b53576e..f684c18 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -700,7 +700,6 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib);

radeon_semaphore_sync_resv(ib.semaphore, pd->tbo.resv, false); - radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { @@ -968,7 +967,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > ndw);

- radeon_semaphore_sync_fence(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib);

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 04/11] drm/radeon: split semaphore and sync object handling

From: Christian König christian.koenig@amd.com

Previously we just allocated space for four hardware semaphores in each software semaphore object. Make software semaphore objects represent only one hardware semaphore address again by splitting the sync code into it's own object.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/Makefile | 3 +- drivers/gpu/drm/radeon/cik.c | 18 ++- drivers/gpu/drm/radeon/cik_sdma.c | 18 ++- drivers/gpu/drm/radeon/evergreen_dma.c | 18 ++- drivers/gpu/drm/radeon/r600.c | 18 ++- drivers/gpu/drm/radeon/r600_dma.c | 18 ++- drivers/gpu/drm/radeon/radeon.h | 40 +++--- drivers/gpu/drm/radeon/radeon_cs.c | 7 +- drivers/gpu/drm/radeon/radeon_ib.c | 13 +- drivers/gpu/drm/radeon/radeon_semaphore.c | 139 +-------------------- drivers/gpu/drm/radeon/radeon_sync.c | 198 ++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_vm.c | 4 +- drivers/gpu/drm/radeon/rv770_dma.c | 18 ++- drivers/gpu/drm/radeon/si_dma.c | 18 ++- 14 files changed, 286 insertions(+), 244 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_sync.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 7d7aed5..8cc9f68 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o + ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \ + radeon_sync.o

# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 16a861d..6dd55ea 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3970,31 +3970,27 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.blit_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes, control; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -4018,12 +4014,12 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dd73246..06602e4 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -548,31 +548,27 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -593,12 +589,12 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 946f37d..2958fa8 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -110,31 +110,27 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -153,12 +149,12 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 5e9146b..5205070 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2899,31 +2899,27 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.blit_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes, tmp; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff); r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); @@ -2952,12 +2948,12 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index fc54224..8c39659 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -447,31 +447,27 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -490,12 +486,12 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a93c9d9..c326fae 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -147,9 +147,6 @@ extern int radeon_bapm; /* number of hw syncs before falling back on blocking */ #define RADEON_NUM_SYNCS 4

-/* number of hw syncs before falling back on blocking */ -#define RADEON_NUM_SYNCS 4 - /* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -573,10 +570,9 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, * Semaphores. */ struct radeon_semaphore { - struct radeon_sa_bo *sa_bo; - signed waiters; - uint64_t gpu_addr; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + struct radeon_sa_bo *sa_bo; + signed waiters; + uint64_t gpu_addr; };

int radeon_semaphore_create(struct radeon_device *rdev, @@ -585,19 +581,31 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore, - struct radeon_fence *fence); -void radeon_semaphore_sync_resv(struct radeon_semaphore *semaphore, - struct reservation_object *resv, - bool shared); -int radeon_semaphore_sync_rings(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, - int waiting_ring); void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence);

/* + * Synchronization + */ +struct radeon_sync { + struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS]; + struct radeon_fence *sync_to[RADEON_NUM_RINGS]; +}; + +void radeon_sync_create(struct radeon_sync *sync); +void radeon_sync_fence(struct radeon_sync *sync, + struct radeon_fence *fence); +void radeon_sync_resv(struct radeon_sync *sync, + struct reservation_object *resv, + bool shared); +int radeon_sync_rings(struct radeon_device *rdev, + struct radeon_sync *sync, + int waiting_ring); +void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync, + struct radeon_fence *fence); + +/* * GART structures, functions & helpers */ struct radeon_mc; @@ -810,7 +818,7 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib; - struct radeon_semaphore *semaphore; + struct radeon_sync sync; };

struct radeon_ring { diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f662de4..0619c32 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -260,8 +260,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) continue;

resv = p->relocs[i].robj->tbo.resv; - radeon_semaphore_sync_resv(p->ib.semaphore, resv, - p->relocs[i].tv.shared); + radeon_sync_resv(&p->ib.sync, resv, p->relocs[i].tv.shared); } }

@@ -281,9 +280,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL; - p->ib.semaphore = NULL; p->const_ib.sa_bo = NULL; - p->const_ib.semaphore = NULL; p->chunk_ib_idx = -1; p->chunk_relocs_idx = -1; p->chunk_flags_idx = -1; @@ -566,7 +563,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence); + radeon_sync_fence(&parser->ib.sync, vm->fence);

if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 3f39fcc..56a1704 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }

- r = radeon_semaphore_create(rdev, &ib->semaphore); - if (r) { - return r; - } + radeon_sync_create(&ib->sync);

ib->ring = ring; ib->fence = NULL; @@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, */ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); + radeon_sync_free(rdev, &ib->sync, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } @@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (ib->vm) { struct radeon_fence *vm_id_fence; vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); - radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence); + radeon_sync_fence(&ib->sync, vm_id_fence); }

/* sync with other rings */ - r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + r = radeon_sync_rings(rdev, &ib->sync, ib->ring); if (r) { dev_err(rdev->dev, "failed to sync rings (%d)\n", r); radeon_ring_unlock_undo(rdev, ring); @@ -161,7 +158,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,

if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); - radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); + radeon_sync_free(rdev, &const_ib->sync, NULL); } radeon_ring_ib_execute(rdev, ib->ring, ib); r = radeon_fence_emit(rdev, &ib->fence, ib->ring); diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 5bfbd83..e6ad54c 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,15 +34,14 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) { - uint32_t *cpu_addr; - int i, r; + int r;

*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); if (*semaphore == NULL) { return -ENOMEM; } - r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo, - 8 * RADEON_NUM_SYNCS, 8); + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, + &(*semaphore)->sa_bo, 8, 8); if (r) { kfree(*semaphore); *semaphore = NULL; @@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);

- cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); - for (i = 0; i < RADEON_NUM_SYNCS; ++i) - cpu_addr[i] = 0; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) - (*semaphore)->sync_to[i] = NULL; + *((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;

return 0; } @@ -95,131 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, return false; }

-/** - * radeon_semaphore_sync_fence - use the semaphore to sync to a fence - * - * @semaphore: semaphore object to add fence to - * @fence: fence to sync to - * - * Sync to the fence using this semaphore object - */ -void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore, - struct radeon_fence *fence) -{ - struct radeon_fence *other; - - if (!fence) - return; - - other = semaphore->sync_to[fence->ring]; - semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); -} - -/** - * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object - * - * @sema: semaphore object to add fence from reservation object to - * @resv: reservation object with embedded fence - * @shared: true if we should onyl sync to the exclusive fence - * - * Sync to the fence using this semaphore object - */ -void radeon_semaphore_sync_resv(struct radeon_semaphore *sema, - struct reservation_object *resv, - bool shared) -{ - struct reservation_object_list *flist; - struct fence *f; - unsigned i; - - /* always sync to the exclusive fence */ - f = reservation_object_get_excl(resv); - radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f); - - flist = reservation_object_get_list(resv); - if (shared || !flist) - return; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(resv)); - radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f); - } -} - -/** - * radeon_semaphore_sync_rings - sync ring to all registered fences - * - * @rdev: radeon_device pointer - * @semaphore: semaphore object to use for sync - * @ring: ring that needs sync - * - * Ensure that all registered fences are signaled before letting - * the ring continue. The caller must hold the ring lock. - */ -int radeon_semaphore_sync_rings(struct radeon_device *rdev, - struct radeon_semaphore *semaphore, - int ring) -{ - unsigned count = 0; - int i, r; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = semaphore->sync_to[i]; - - /* check if we really need to sync */ - if (!radeon_fence_need_sync(fence, ring)) - continue; - - /* prevent GPU deadlocks */ - if (!rdev->ring[i].ready) { - dev_err(rdev->dev, "Syncing to a disabled ring!"); - return -EINVAL; - } - - if (++count > RADEON_NUM_SYNCS) { - /* not enough room, wait manually */ - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - /* allocate enough space for sync command */ - r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); - if (r) { - return r; - } - - /* emit the signal semaphore */ - if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { - /* signaling wasn't successful wait manually */ - radeon_ring_undo(&rdev->ring[i]); - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - /* we assume caller has already allocated space on waiters ring */ - if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { - /* waiting wasn't successful wait manually */ - radeon_ring_undo(&rdev->ring[i]); - r = radeon_fence_wait(fence, false); - if (r) - return r; - continue; - } - - radeon_ring_commit(rdev, &rdev->ring[i], false); - radeon_fence_note_sync(fence, ring); - - semaphore->gpu_addr += 8; - } - - return 0; -} - void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence) diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c new file mode 100644 index 0000000..77b7401 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -0,0 +1,198 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König christian.koenig@amd.com + */ + +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_trace.h" + +/** + * radeon_sync_create - zero init sync object + * + * @sync: sync object to initialize + * + * Just clear the sync object for now. + */ +void radeon_sync_create(struct radeon_sync *sync) +{ + unsigned i; + + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + sync->semaphores[i] = NULL; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) + sync->sync_to[i] = NULL; +} + +/** + * radeon_sync_fence - use the semaphore to sync to a fence + * + * @sync: sync object to add fence to + * @fence: fence to sync to + * + * Sync to the fence using the semaphore objects + */ +void radeon_sync_fence(struct radeon_sync *sync, + struct radeon_fence *fence) +{ + struct radeon_fence *other; + + if (!fence) + return; + + other = sync->sync_to[fence->ring]; + sync->sync_to[fence->ring] = radeon_fence_later(fence, other); +} + +/** + * radeon_sync_resv - use the semaphores to sync to a reservation object + * + * @sync: sync object to add fences from reservation object to + * @resv: reservation object with embedded fence + * @shared: true if we should onyl sync to the exclusive fence + * + * Sync to the fence using the semaphore objects + */ +void radeon_sync_resv(struct radeon_sync *sync, + struct reservation_object *resv, + bool shared) +{ + struct reservation_object_list *flist; + struct fence *f; + unsigned i; + + /* always sync to the exclusive fence */ + f = reservation_object_get_excl(resv); + radeon_sync_fence(sync, (struct radeon_fence*)f); + + flist = reservation_object_get_list(resv); + if (shared || !flist) + return; + + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(resv)); + radeon_sync_fence(sync, (struct radeon_fence*)f); + } +} + +/** + * radeon_sync_rings - sync ring to all registered fences + * + * @rdev: radeon_device pointer + * @sync: sync object to use + * @ring: ring that needs sync + * + * Ensure that all registered fences are signaled before letting + * the ring continue. The caller must hold the ring lock. + */ +int radeon_sync_rings(struct radeon_device *rdev, + struct radeon_sync *sync, + int ring) +{ + unsigned count = 0; + int i, r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_fence *fence = sync->sync_to[i]; + struct radeon_semaphore *semaphore; + + /* check if we really need to sync */ + if (!radeon_fence_need_sync(fence, ring)) + continue; + + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Syncing to a disabled ring!"); + return -EINVAL; + } + + if (count >= RADEON_NUM_SYNCS) { + /* not enough room, wait manually */ + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + r = radeon_semaphore_create(rdev, &semaphore); + if (r) + return r; + + sync->semaphores[count++] = semaphore; + + /* allocate enough space for sync command */ + r = radeon_ring_alloc(rdev, &rdev->ring[i], 16); + if (r) + return r; + + /* emit the signal semaphore */ + if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) { + /* signaling wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + + /* we assume caller has already allocated space on waiters ring */ + if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + /* waiting wasn't successful wait manually */ + radeon_ring_undo(&rdev->ring[i]); + r = radeon_fence_wait(fence, false); + if (r) + return r; + continue; + } + + radeon_ring_commit(rdev, &rdev->ring[i], false); + radeon_fence_note_sync(fence, ring); + } + + return 0; +} + +/** + * radeon_sync_free - free the sync object + * + * @rdev: radeon_device pointer + * @sync: sync object to use + * @fence: fence to use for the free + * + * Free the sync object by freeing all semaphores in it. + */ +void radeon_sync_free(struct radeon_device *rdev, + struct radeon_sync *sync, + struct radeon_fence *fence) +{ + unsigned i; + + for (i = 0; i < RADEON_NUM_SYNCS; ++i) + radeon_semaphore_free(rdev, &sync->semaphores[i], fence); +} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index f684c18..411146a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -699,7 +699,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);

- radeon_semaphore_sync_resv(ib.semaphore, pd->tbo.resv, false); + radeon_sync_resv(&ib.sync, pd->tbo.resv, false); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { @@ -825,7 +825,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned nptes; uint64_t pte;

- radeon_semaphore_sync_resv(ib->semaphore, pt->tbo.resv, false); + radeon_sync_resv(&ib->sync, pt->tbo.resv, false);

if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index c112764..564822c 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -44,31 +44,27 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_dw, cur_size_in_dw; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_dw = size_in_dw; @@ -87,12 +83,12 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; } diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 6573843..0b7d3c5 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -226,31 +226,27 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) { - struct radeon_semaphore *sem = NULL; struct radeon_fence *fence; + struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes; int i, num_loops; int r = 0;

- r = radeon_semaphore_create(rdev, &sem); - if (r) { - DRM_ERROR("radeon: moving bo (%d).\n", r); - return ERR_PTR(r); - } + radeon_sync_create(&sync);

size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff); r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

- radeon_semaphore_sync_resv(sem, resv, false); - radeon_semaphore_sync_rings(rdev, sem, ring->idx); + radeon_sync_resv(&sync, resv, false); + radeon_sync_rings(rdev, &sync, ring->idx);

for (i = 0; i < num_loops; i++) { cur_size_in_bytes = size_in_bytes; @@ -269,12 +265,12 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); - radeon_semaphore_free(rdev, &sem, NULL); + radeon_sync_free(rdev, &sync, NULL); return ERR_PTR(r); }

radeon_ring_unlock_commit(rdev, ring, false); - radeon_semaphore_free(rdev, &sem, fence); + radeon_sync_free(rdev, &sync, fence);

return fence; }

-- 1.9.1

Alex Deucher

17 Sep 17 Sep

8:52 p.m.

New subject: [PATCH 04/11] drm/radeon: split semaphore and sync object handling

On Fri, Sep 12, 2014 at 7:52 AM, Christian König deathsimple@vodafone.de wrote:

...

From: Christian König christian.koenig@amd.com

Previously we just allocated space for four hardware semaphores in each software semaphore object. Make software semaphore objects represent only one hardware semaphore address again by splitting the sync code into it's own object.

Signed-off-by: Christian König christian.koenig@amd.com

Minor typo noted below.

Alex

...

drivers/gpu/drm/radeon/Makefile | 3 +- drivers/gpu/drm/radeon/cik.c | 18 ++- drivers/gpu/drm/radeon/cik_sdma.c | 18 ++- drivers/gpu/drm/radeon/evergreen_dma.c | 18 ++- drivers/gpu/drm/radeon/r600.c | 18 ++- drivers/gpu/drm/radeon/r600_dma.c | 18 ++- drivers/gpu/drm/radeon/radeon.h | 40 +++--- drivers/gpu/drm/radeon/radeon_cs.c | 7 +- drivers/gpu/drm/radeon/radeon_ib.c | 13 +- drivers/gpu/drm/radeon/radeon_semaphore.c | 139 +-------------------- drivers/gpu/drm/radeon/radeon_sync.c | 198 ++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_vm.c | 4 +- drivers/gpu/drm/radeon/rv770_dma.c | 18 ++- drivers/gpu/drm/radeon/si_dma.c | 18 ++- 14 files changed, 286 insertions(+), 244 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_sync.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 7d7aed5..8cc9f68 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,8 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
  ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
  ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \
  radeon_sync.o
# add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 16a861d..6dd55ea 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3970,31 +3970,27 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {
  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;
  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.blit_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_bytes, cur_size_in_bytes, control;
  int i, num_loops;
  int r = 0;
  r = radeon_semaphore_create(rdev, &sem);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          return ERR_PTR(r);
  }
  radeon_sync_create(&sync);

  size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
  num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
  r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }
  radeon_semaphore_sync_resv(sem, resv, false);
  radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  radeon_sync_resv(&sync, resv, false);
  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_bytes = size_in_bytes;
@@ -4018,12 +4014,12 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);
  radeon_semaphore_free(rdev, &sem, fence);
  radeon_sync_free(rdev, &sync, fence);

  return fence;
} diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dd73246..06602e4 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -548,31 +548,27 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {
  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;
  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.dma_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_bytes, cur_size_in_bytes;
  int i, num_loops;
  int r = 0;
  r = radeon_semaphore_create(rdev, &sem);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          return ERR_PTR(r);
  }
  radeon_sync_create(&sync);

  size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
  num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
  r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }
  radeon_semaphore_sync_resv(sem, resv, false);
  radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  radeon_sync_resv(&sync, resv, false);
  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_bytes = size_in_bytes;
@@ -593,12 +589,12 @@ struct radeon_fence *cik_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);
  radeon_semaphore_free(rdev, &sem, fence);
  radeon_sync_free(rdev, &sync, fence);

  return fence;
} diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 946f37d..2958fa8 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -110,31 +110,27 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {
  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;
  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.dma_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_dw, cur_size_in_dw;
  int i, num_loops;
  int r = 0;
  r = radeon_semaphore_create(rdev, &sem);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          return ERR_PTR(r);
  }
  radeon_sync_create(&sync);

  size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
  num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
  r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }
  radeon_semaphore_sync_resv(sem, resv, false);
  radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  radeon_sync_resv(&sync, resv, false);
  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_dw = size_in_dw;
@@ -153,12 +149,12 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);
  radeon_semaphore_free(rdev, &sem, fence);
  radeon_sync_free(rdev, &sync, fence);

  return fence;
} diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 5e9146b..5205070 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2899,31 +2899,27 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {
  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;
  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.blit_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_bytes, cur_size_in_bytes, tmp;
  int i, num_loops;
  int r = 0;
  r = radeon_semaphore_create(rdev, &sem);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          return ERR_PTR(r);
  }
  radeon_sync_create(&sync);

  size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
  num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
  r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }
  radeon_semaphore_sync_resv(sem, resv, false);
  radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  radeon_sync_resv(&sync, resv, false);
  radeon_sync_rings(rdev, &sync, ring->idx);

  radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
  radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
@@ -2952,12 +2948,12 @@ struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);
  radeon_semaphore_free(rdev, &sem, fence);
  radeon_sync_free(rdev, &sync, fence);

  return fence;
} diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index fc54224..8c39659 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -447,31 +447,27 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {
  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;
  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.dma_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_dw, cur_size_in_dw;
  int i, num_loops;
  int r = 0;
  r = radeon_semaphore_create(rdev, &sem);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          return ERR_PTR(r);
  }
  radeon_sync_create(&sync);

  size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
  num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
  r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }
  radeon_semaphore_sync_resv(sem, resv, false);
  radeon_semaphore_sync_rings(rdev, sem, ring->idx);
  radeon_sync_resv(&sync, resv, false);
  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_dw = size_in_dw;
@@ -490,12 +486,12 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);
          radeon_semaphore_free(rdev, &sem, NULL);
          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);
  radeon_semaphore_free(rdev, &sem, fence);
  radeon_sync_free(rdev, &sync, fence);

  return fence;
} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a93c9d9..c326fae 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -147,9 +147,6 @@ extern int radeon_bapm; /* number of hw syncs before falling back on blocking */ #define RADEON_NUM_SYNCS 4

-/* number of hw syncs before falling back on blocking */ -#define RADEON_NUM_SYNCS 4

/* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_RESERVED_SIZE (8 << 20) @@ -573,10 +570,9 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,

Semaphores.

*/ struct radeon_semaphore {
  struct radeon_sa_bo             *sa_bo;
  signed                          waiters;
  uint64_t                        gpu_addr;
  struct radeon_fence             *sync_to[RADEON_NUM_RINGS];
  struct radeon_sa_bo     *sa_bo;
  signed                  waiters;
  uint64_t                gpu_addr;
};

int radeon_semaphore_create(struct radeon_device *rdev, @@ -585,19 +581,31 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); -void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
                           struct radeon_fence *fence);
-void radeon_semaphore_sync_resv(struct radeon_semaphore *semaphore,
                          struct reservation_object *resv,
                          bool shared);
-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
                          struct radeon_semaphore *semaphore,
                          int waiting_ring);
void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence);

/*

Synchronization

*/

+struct radeon_sync {
  struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS];
  struct radeon_fence     *sync_to[RADEON_NUM_RINGS];
+};

+void radeon_sync_create(struct radeon_sync *sync); +void radeon_sync_fence(struct radeon_sync *sync,
                 struct radeon_fence *fence);
+void radeon_sync_resv(struct radeon_sync *sync,
                struct reservation_object *resv,
                bool shared);
+int radeon_sync_rings(struct radeon_device *rdev,
                struct radeon_sync *sync,
                int waiting_ring);
+void radeon_sync_free(struct radeon_device *rdev, struct radeon_sync *sync,
                struct radeon_fence *fence);
+/*

GART structures, functions & helpers

*/ struct radeon_mc; @@ -810,7 +818,7 @@ struct radeon_ib { struct radeon_fence *fence; struct radeon_vm *vm; bool is_const_ib;
  struct radeon_semaphore         *semaphore;
  struct radeon_sync              sync;
};

struct radeon_ring { diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index f662de4..0619c32 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -260,8 +260,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p) continue;
            resv = p->relocs[i].robj->tbo.resv;
          radeon_semaphore_sync_resv(p->ib.semaphore, resv,
                                     p->relocs[i].tv.shared);
          radeon_sync_resv(&p->ib.sync, resv, p->relocs[i].tv.shared);
  }
}

@@ -281,9 +280,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL;
  p->ib.semaphore = NULL;
  p->const_ib.sa_bo = NULL;
  p->const_ib.semaphore = NULL;
  p->chunk_ib_idx = -1;
  p->chunk_relocs_idx = -1;
  p->chunk_flags_idx = -1;
@@ -566,7 +563,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser);
  radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
  radeon_sync_fence(&parser->ib.sync, vm->fence);

  if ((rdev->family >= CHIP_TAHITI) &&
      (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 3f39fcc..56a1704 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -64,10 +64,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, return r; }
  r = radeon_semaphore_create(rdev, &ib->semaphore);
  if (r) {
          return r;
  }
  radeon_sync_create(&ib->sync);

  ib->ring = ring;
  ib->fence = NULL;
@@ -96,7 +93,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, */ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) {
  radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
  radeon_sync_free(rdev, &ib->sync, ib->fence);
  radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
  radeon_fence_unref(&ib->fence);
} @@ -145,11 +142,11 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (ib->vm) { struct radeon_fence *vm_id_fence; vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
          radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
          radeon_sync_fence(&ib->sync, vm_id_fence);
  }

  /* sync with other rings */
  r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
  r = radeon_sync_rings(rdev, &ib->sync, ib->ring);
  if (r) {
          dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
          radeon_ring_unlock_undo(rdev, ring);
@@ -161,7 +158,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
    if (const_ib) {
            radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
          radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
          radeon_sync_free(rdev, &const_ib->sync, NULL);
  }
  radeon_ring_ib_execute(rdev, ib->ring, ib);
  r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 5bfbd83..e6ad54c 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -34,15 +34,14 @@ int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) {
  uint32_t *cpu_addr;
  int i, r;
  int r;

  *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
  if (*semaphore == NULL) {
          return -ENOMEM;
  }
  r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
                       8 * RADEON_NUM_SYNCS, 8);
  r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo,
                       &(*semaphore)->sa_bo, 8, 8);
  if (r) {
          kfree(*semaphore);
          *semaphore = NULL;
@@ -51,12 +50,7 @@ int radeon_semaphore_create(struct radeon_device *rdev, (*semaphore)->waiters = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
  cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo);
  for (i = 0; i < RADEON_NUM_SYNCS; ++i)
          cpu_addr[i] = 0;
  for (i = 0; i < RADEON_NUM_RINGS; ++i)
          (*semaphore)->sync_to[i] = NULL;
  *((uint64_t *)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;

  return 0;
} @@ -95,131 +89,6 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx, return false; }

-/**

radeon_semaphore_sync_fence - use the semaphore to sync to a fence

@semaphore: semaphore object to add fence to

@fence: fence to sync to

Sync to the fence using this semaphore object

*/

-void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
                           struct radeon_fence *fence)
-{
   struct radeon_fence *other;
   if (!fence)
           return;
   other = semaphore->sync_to[fence->ring];
   semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
-}

-/**

radeon_semaphore_sync_to - use the semaphore to sync to a reservation object

@sema: semaphore object to add fence from reservation object to

@resv: reservation object with embedded fence

@shared: true if we should onyl sync to the exclusive fence

Sync to the fence using this semaphore object

*/

-void radeon_semaphore_sync_resv(struct radeon_semaphore *sema,
                          struct reservation_object *resv,
                          bool shared)
-{
  struct reservation_object_list *flist;
  struct fence *f;
  unsigned i;
  /* always sync to the exclusive fence */
  f = reservation_object_get_excl(resv);
  radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
  flist = reservation_object_get_list(resv);
  if (shared || !flist)
          return;
  for (i = 0; i < flist->shared_count; ++i) {
          f = rcu_dereference_protected(flist->shared[i],
                                        reservation_object_held(resv));
          radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
  }
-}

-/**

radeon_semaphore_sync_rings - sync ring to all registered fences

@rdev: radeon_device pointer

@semaphore: semaphore object to use for sync

@ring: ring that needs sync

Ensure that all registered fences are signaled before letting

the ring continue. The caller must hold the ring lock.

*/

-int radeon_semaphore_sync_rings(struct radeon_device *rdev,
                          struct radeon_semaphore *semaphore,
                          int ring)
-{
  unsigned count = 0;
  int i, r;
   for (i = 0; i < RADEON_NUM_RINGS; ++i) {
          struct radeon_fence *fence = semaphore->sync_to[i];
          /* check if we really need to sync */
           if (!radeon_fence_need_sync(fence, ring))
                  continue;
          /* prevent GPU deadlocks */
          if (!rdev->ring[i].ready) {
                  dev_err(rdev->dev, "Syncing to a disabled ring!");
                  return -EINVAL;
          }
          if (++count > RADEON_NUM_SYNCS) {
                  /* not enough room, wait manually */
                  r = radeon_fence_wait(fence, false);
                  if (r)
                          return r;
                  continue;
          }
          /* allocate enough space for sync command */
          r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
          if (r) {
                  return r;
          }
          /* emit the signal semaphore */
          if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
                  /* signaling wasn't successful wait manually */
                  radeon_ring_undo(&rdev->ring[i]);
                  r = radeon_fence_wait(fence, false);
                  if (r)
                          return r;
                  continue;
          }
          /* we assume caller has already allocated space on waiters ring */
          if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
                  /* waiting wasn't successful wait manually */
                  radeon_ring_undo(&rdev->ring[i]);
                  r = radeon_fence_wait(fence, false);
                  if (r)
                          return r;
                  continue;
          }
          radeon_ring_commit(rdev, &rdev->ring[i], false);
          radeon_fence_note_sync(fence, ring);
          semaphore->gpu_addr += 8;
  }
  return 0;
-}

void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence) diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c new file mode 100644 index 0000000..77b7401 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -0,0 +1,198 @@ +/*

Copyright 2014 Advanced Micro Devices, Inc.

All Rights Reserved.

Permission is hereby granted, free of charge, to any person obtaining a

copy of this software and associated documentation files (the

"Software"), to deal in the Software without restriction, including

without limitation the rights to use, copy, modify, merge, publish,

distribute, sub license, and/or sell copies of the Software, and to

permit persons to whom the Software is furnished to do so, subject to

the following conditions:

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL

THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,

DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR

OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE

USE OR OTHER DEALINGS IN THE SOFTWARE.

The above copyright notice and this permission notice (including the

next paragraph) shall be included in all copies or substantial portions

of the Software.

*/

+/*

Authors:

Christian König christian.koenig@amd.com

*/

+#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_trace.h"

+/**

radeon_sync_create - zero init sync object

@sync: sync object to initialize

Just clear the sync object for now.

*/

+void radeon_sync_create(struct radeon_sync *sync) +{
  unsigned i;
  for (i = 0; i < RADEON_NUM_SYNCS; ++i)
          sync->semaphores[i] = NULL;
  for (i = 0; i < RADEON_NUM_RINGS; ++i)
          sync->sync_to[i] = NULL;
+}

+/**

radeon_sync_fence - use the semaphore to sync to a fence

@sync: sync object to add fence to

@fence: fence to sync to

Sync to the fence using the semaphore objects

*/

+void radeon_sync_fence(struct radeon_sync *sync,
                 struct radeon_fence *fence)
+{
  struct radeon_fence *other;
  if (!fence)
          return;
  other = sync->sync_to[fence->ring];
  sync->sync_to[fence->ring] = radeon_fence_later(fence, other);
+}

+/**

radeon_sync_resv - use the semaphores to sync to a reservation object

@sync: sync object to add fences from reservation object to

@resv: reservation object with embedded fence

@shared: true if we should onyl sync to the exclusive fence

typo: only

...

- Sync to the fence using the semaphore objects
*/

+void radeon_sync_resv(struct radeon_sync *sync,

                struct reservation_object *resv,

```
                bool shared)
```

  struct reservation_object_list *flist;

```
  struct fence *f;
```
```
  unsigned i;
```

  /* always sync to the exclusive fence */

  f = reservation_object_get_excl(resv);

  radeon_sync_fence(sync, (struct radeon_fence*)f);

  flist = reservation_object_get_list(resv);

```
  if (shared || !flist)
```
```
          return;
```

  for (i = 0; i < flist->shared_count; ++i) {

          f = rcu_dereference_protected(flist->shared[i],

                                        reservation_object_held(resv));

          radeon_sync_fence(sync, (struct radeon_fence*)f);

```
  }
```

+/**

- radeon_sync_rings - sync ring to all registered fences
- @rdev: radeon_device pointer
- @sync: sync object to use
- @ring: ring that needs sync
- Ensure that all registered fences are signaled before letting
- the ring continue. The caller must hold the ring lock.
*/

+int radeon_sync_rings(struct radeon_device *rdev,

                struct radeon_sync *sync,

```
                int ring)
```

```
  unsigned count = 0;
```
```
  int i, r;
```

  for (i = 0; i < RADEON_NUM_RINGS; ++i) {

          struct radeon_fence *fence = sync->sync_to[i];

          struct radeon_semaphore *semaphore;

          /* check if we really need to sync */

          if (!radeon_fence_need_sync(fence, ring))

```
                  continue;
```
```
          /* prevent GPU deadlocks */
```
```
          if (!rdev->ring[i].ready) {
```

                  dev_err(rdev->dev, "Syncing to a disabled ring!");

```
                  return -EINVAL;
```
```
          }
```

          if (count >= RADEON_NUM_SYNCS) {

                  /* not enough room, wait manually */

                  r = radeon_fence_wait(fence, false);

```
                  if (r)
```
```
                          return r;
```
```
                  continue;
```
```
          }
```

          r = radeon_semaphore_create(rdev, &semaphore);

```
          if (r)
```
```
                  return r;
```

          sync->semaphores[count++] = semaphore;

          /* allocate enough space for sync command */

          r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);

```
          if (r)
```
```
                  return r;
```

          /* emit the signal semaphore */

          if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {

                  /* signaling wasn't successful wait manually */

                  radeon_ring_undo(&rdev->ring[i]);

                  r = radeon_fence_wait(fence, false);

```
                  if (r)
```
```
                          return r;
```
```
                  continue;
```
```
          }
```

          /* we assume caller has already allocated space on waiters ring */

          if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {

                  /* waiting wasn't successful wait manually */

                  radeon_ring_undo(&rdev->ring[i]);

                  r = radeon_fence_wait(fence, false);

```
                  if (r)
```
```
                          return r;
```
```
                  continue;
```
```
          }
```

          radeon_ring_commit(rdev, &rdev->ring[i], false);

          radeon_fence_note_sync(fence, ring);

```
  }
```
```
  return 0;
```

+/**

- radeon_sync_free - free the sync object
- @rdev: radeon_device pointer
- @sync: sync object to use
- @fence: fence to use for the free
- Free the sync object by freeing all semaphores in it.
*/

+void radeon_sync_free(struct radeon_device *rdev,

                struct radeon_sync *sync,

                struct radeon_fence *fence)

```
  unsigned i;
```

  for (i = 0; i < RADEON_NUM_SYNCS; ++i)

          radeon_semaphore_free(rdev, &sync->semaphores[i], fence);

+} diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index f684c18..411146a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -699,7 +699,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);

          radeon_semaphore_sync_resv(ib.semaphore, pd->tbo.resv, false);

          radeon_sync_resv(&ib.sync, pd->tbo.resv, false);
          WARN_ON(ib.length_dw > ndw);
          r = radeon_ib_schedule(rdev, &ib, NULL, false);
          if (r) {

@@ -825,7 +825,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned nptes; uint64_t pte;

          radeon_semaphore_sync_resv(ib->semaphore, pt->tbo.resv, false);

          radeon_sync_resv(&ib->sync, pt->tbo.resv, false);

          if ((addr & ~mask) == (end & ~mask))
                  nptes = end - addr;

diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index c112764..564822c 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -44,31 +44,27 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {

  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;

  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.dma_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_dw, cur_size_in_dw;
  int i, num_loops;
  int r = 0;

  r = radeon_semaphore_create(rdev, &sem);

```
  if (r) {
```

          DRM_ERROR("radeon: moving bo (%d).\n", r);

```
          return ERR_PTR(r);
```
```
  }
```

  radeon_sync_create(&sync);

  size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
  num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
  r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);

          radeon_semaphore_free(rdev, &sem, NULL);

          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_semaphore_sync_resv(sem, resv, false);

  radeon_semaphore_sync_rings(rdev, sem, ring->idx);

  radeon_sync_resv(&sync, resv, false);

  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_dw = size_in_dw;

@@ -87,12 +83,12 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);

          radeon_semaphore_free(rdev, &sem, NULL);

          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);

  radeon_semaphore_free(rdev, &sem, fence);

  radeon_sync_free(rdev, &sync, fence);

  return fence;

} diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 6573843..0b7d3c5 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -226,31 +226,27 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, unsigned num_gpu_pages, struct reservation_object *resv) {

  struct radeon_semaphore *sem = NULL;
  struct radeon_fence *fence;

  struct radeon_sync sync;
  int ring_index = rdev->asic->copy.dma_ring_index;
  struct radeon_ring *ring = &rdev->ring[ring_index];
  u32 size_in_bytes, cur_size_in_bytes;
  int i, num_loops;
  int r = 0;

  r = radeon_semaphore_create(rdev, &sem);

```
  if (r) {
```

          DRM_ERROR("radeon: moving bo (%d).\n", r);

```
          return ERR_PTR(r);
```
```
  }
```

  radeon_sync_create(&sync);

  size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
  num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
  r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
  if (r) {
          DRM_ERROR("radeon: moving bo (%d).\n", r);

          radeon_semaphore_free(rdev, &sem, NULL);

          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_semaphore_sync_resv(sem, resv, false);

  radeon_semaphore_sync_rings(rdev, sem, ring->idx);

  radeon_sync_resv(&sync, resv, false);

  radeon_sync_rings(rdev, &sync, ring->idx);

  for (i = 0; i < num_loops; i++) {
          cur_size_in_bytes = size_in_bytes;

@@ -269,12 +265,12 @@ struct radeon_fence *si_copy_dma(struct radeon_device *rdev, r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring);

          radeon_semaphore_free(rdev, &sem, NULL);

          radeon_sync_free(rdev, &sync, NULL);
          return ERR_PTR(r);
  }

  radeon_ring_unlock_commit(rdev, ring, false);

  radeon_semaphore_free(rdev, &sem, fence);

  radeon_sync_free(rdev, &sync, fence);

  return fence;

}

1.9.1

dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel

Christian König

12 Sep 12 Sep

11:52 a.m.

New subject: [PATCH 05/11] drm/radeon: fence PT updates manually

From: Christian König christian.koenig@amd.com

This allows us to add the real execution fence as shared.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_object.c | 19 ++++++++++ drivers/gpu/drm/radeon/radeon_object.h | 2 ++ drivers/gpu/drm/radeon/radeon_vm.c | 65 +++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 8abee5f..8714baf 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -790,3 +790,22 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) ttm_bo_unreserve(&bo->tbo); return r; } + +/** + * radeon_bo_fence - add fence to buffer object + * + * @bo: buffer object in question + * @fence: fence to add + * @shared: true if fence should be added shared + * + */ +void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, + bool shared) +{ + struct reservation_object *resv = bo->tbo.resv; + + if (shared) + reservation_object_add_shared_fence(resv, &fence->base); + else + reservation_object_add_excl_fence(resv, &fence->base); +} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 98a47fd..ec80b28 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -154,6 +154,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); +extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, + bool shared);

/* * sub allocation diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 411146a..84b2735 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -143,7 +143,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[0].tv.bo = &vm->page_directory->tbo; - list[0].tv.shared = false; + list[0].tv.shared = true; list[0].tiling_flags = 0; list[0].handle = 0; list_add(&list[0].tv.head, head); @@ -157,7 +157,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].tv.bo = &list[idx].robj->tbo; - list[idx].tv.shared = false; + list[idx].tv.shared = true; list[idx].tiling_flags = 0; list[idx].handle = 0; list_add(&list[idx++].tv.head, head); @@ -388,35 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev, static int radeon_vm_clear_bo(struct radeon_device *rdev, struct radeon_bo *bo) { - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; struct radeon_ib ib; unsigned entries; uint64_t addr; int r;

- memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - tv.shared = false; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head, true); - if (r) + r = radeon_bo_reserve(bo, false); + if (r) return r;

- r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) - goto error; + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto error_unreserve;

addr = radeon_bo_gpu_offset(bo); entries = radeon_bo_size(bo) / 8;

r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); if (r) - goto error; + goto error_unreserve;

ib.length_dw = 0;

@@ -426,15 +416,15 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,

r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) - goto error; + goto error_free;

- ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base); - radeon_ib_free(rdev, &ib); + radeon_bo_fence(bo, ib.fence, false);

- return 0; +error_free: + radeon_ib_free(rdev, &ib);

-error: - ttm_eu_backoff_reservation(&ticket, &head); +error_unreserve: + radeon_bo_unreserve(bo); return r; }

@@ -706,6 +696,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + radeon_bo_fence(pd, ib.fence, false); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_fence_unref(&vm->last_flush); @@ -862,6 +853,31 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, }

/** + * radeon_vm_fence_pts - fence page tables after an update + * + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @fence: fence to use + * + * Fence the page tables in the range @start - @end (cayman+). + * + * Global and local mutex must be locked! + */ +static void radeon_vm_fence_pts(struct radeon_vm *vm, + int64_t start, uint64_t end, + struct radeon_fence *fence) +{ + unsigned i; + + start >>= radeon_vm_block_size; + end >>= radeon_vm_block_size; + + for (i = start; i <= end; ++i) + radeon_bo_fence(vm->page_tables[i].bo, fence, false); +} + +/** * radeon_vm_bo_update - map a bo into the vm page table * * @rdev: radeon_device pointer @@ -972,6 +988,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib);

-- 1.9.1

Alex Deucher

17 Sep 17 Sep

8:52 p.m.

New subject: [PATCH 05/11] drm/radeon: fence PT updates manually

On Fri, Sep 12, 2014 at 7:52 AM, Christian König deathsimple@vodafone.de wrote:

...

From: Christian König christian.koenig@amd.com

This allows us to add the real execution fence as shared.

Signed-off-by: Christian König christian.koenig@amd.com

Minor typo noted below.

...

drivers/gpu/drm/radeon/radeon_object.c | 19 ++++++++++ drivers/gpu/drm/radeon/radeon_object.h | 2 ++ drivers/gpu/drm/radeon/radeon_vm.c | 65 +++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 8abee5f..8714baf 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -790,3 +790,22 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) ttm_bo_unreserve(&bo->tbo); return r; }

+/**

radeon_bo_fence - add fence to buffer object

@bo: buffer object in question

@fence: fence to add

@shared: true if fence should be added shared

*/

+void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                bool shared)
+{
  struct reservation_object *resv = bo->tbo.resv;
  if (shared)
          reservation_object_add_shared_fence(resv, &fence->base);
  else
          reservation_object_add_excl_fence(resv, &fence->base);
+} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 98a47fd..ec80b28 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -154,6 +154,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem); extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); +extern void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                      bool shared);
/*

sub allocation

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 411146a..84b2735 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -143,7 +143,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[0].tv.bo = &vm->page_directory->tbo;
  list[0].tv.shared = false;
  list[0].tv.shared = true;
  list[0].tiling_flags = 0;
  list[0].handle = 0;
  list_add(&list[0].tv.head, head);
@@ -157,7 +157,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].tv.bo = &list[idx].robj->tbo;
          list[idx].tv.shared = false;
          list[idx].tv.shared = true;
          list[idx].tiling_flags = 0;
          list[idx].handle = 0;
          list_add(&list[idx++].tv.head, head);
@@ -388,35 +388,25 @@ static void radeon_vm_set_pages(struct radeon_device *rdev, static int radeon_vm_clear_bo(struct radeon_device *rdev, struct radeon_bo *bo) {
   struct ttm_validate_buffer tv;
   struct ww_acquire_ctx ticket;
   struct list_head head;
  struct radeon_ib ib;
  unsigned entries;
  uint64_t addr;
  int r;
   memset(&tv, 0, sizeof(tv));
   tv.bo = &bo->tbo;
  tv.shared = false;
   INIT_LIST_HEAD(&head);
   list_add(&tv.head, &head);
   r = ttm_eu_reserve_buffers(&ticket, &head, true);
   if (r)
  r = radeon_bo_reserve(bo, false);
  if (r)
          return r;
   r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
   if (r)
           goto error;
  r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
  if (r)
          goto error_unreserve;

  addr = radeon_bo_gpu_offset(bo);
  entries = radeon_bo_size(bo) / 8;

  r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
  if (r)
           goto error;
          goto error_unreserve;

  ib.length_dw = 0;
@@ -426,15 +416,15 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
    r = radeon_ib_schedule(rdev, &ib, NULL, false);
    if (r)
           goto error;
          goto error_free;
  ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
  radeon_ib_free(rdev, &ib);
  radeon_bo_fence(bo, ib.fence, false);
  return 0;
+error_free:
  radeon_ib_free(rdev, &ib);
-error:
  ttm_eu_backoff_reservation(&ticket, &head);
+error_unreserve:
  radeon_bo_unreserve(bo);
  return r;
}

@@ -706,6 +696,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; }
          radeon_bo_fence(pd, ib.fence, false);
          radeon_fence_unref(&vm->fence);
          vm->fence = radeon_fence_ref(ib.fence);
          radeon_fence_unref(&vm->last_flush);
@@ -862,6 +853,31 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, }

/**

radeon_vm_fence_pts - fence page tables after an update

@vm: requested vm

@start: start of GPU address range

@end: end of GPU address range

@fence: fence to use

Fence the page tables in the range @start - @end (cayman+).

Global and local mutex must be locked!

*/

+static void radeon_vm_fence_pts(struct radeon_vm *vm,
                          int64_t start, uint64_t end,

typo: uint64_t start

...

                          struct radeon_fence *fence)
+{
  unsigned i;
  start >>= radeon_vm_block_size;
  end >>= radeon_vm_block_size;
  for (i = start; i <= end; ++i)
          radeon_bo_fence(vm->page_tables[i].bo, fence, false);
+}

+/**

radeon_vm_bo_update - map a bo into the vm page table

@rdev: radeon_device pointer

@@ -972,6 +988,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; }
  radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
  radeon_fence_unref(&vm->fence);
  vm->fence = radeon_fence_ref(ib.fence);
  radeon_ib_free(rdev, &ib);
-- 1.9.1

dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel

Christian König

12 Sep 12 Sep

11:52 a.m.

New subject: [PATCH 06/11] drm/radeon: track VM update fences separately

From: Christian König christian.koenig@amd.com

Note for each fence if it's a VM page table update or not. This allows us to determine the last VM update in a sync object and so to figure out if we need to flush the TLB or not.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 18 ++++++++++-------- drivers/gpu/drm/radeon/radeon_fence.c | 1 + drivers/gpu/drm/radeon/radeon_ib.c | 3 ++- drivers/gpu/drm/radeon/radeon_sync.c | 7 +++++++ drivers/gpu/drm/radeon/radeon_vm.c | 25 +++++++++++++------------ 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c326fae..fddb19e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -357,14 +357,15 @@ struct radeon_fence_driver { };

struct radeon_fence { - struct fence base; + struct fence base;

- struct radeon_device *rdev; - uint64_t seq; + struct radeon_device *rdev; + uint64_t seq; /* RB, DMA, etc. */ - unsigned ring; + unsigned ring; + bool is_vm_update;

- wait_queue_t fence_wake; + wait_queue_t fence_wake; };

int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -591,6 +592,7 @@ void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_sync { struct radeon_semaphore *semaphores[RADEON_NUM_SYNCS]; struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + struct radeon_fence *last_vm_update; };

void radeon_sync_create(struct radeon_sync *sync); @@ -918,8 +920,8 @@ struct radeon_vm { struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; - /* last flush or NULL if we still need to flush */ - struct radeon_fence *last_flush; + /* last flushed PD/PT update */ + struct radeon_fence *flushed_updates; /* last use of vmid */ struct radeon_fence *last_id_use; }; @@ -2948,7 +2950,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring); void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, - int ring); + int ring, struct radeon_fence *fence); void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index af9f2d6..71aa198 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -140,6 +140,7 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->rdev = rdev; (*fence)->seq = seq; (*fence)->ring = ring; + (*fence)->is_vm_update = false; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 56a1704..c39ce1f 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -154,7 +154,8 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, }

if (ib->vm) - radeon_vm_flush(rdev, ib->vm, ib->ring); + radeon_vm_flush(rdev, ib->vm, ib->ring, + ib->sync.last_vm_update);

if (const_ib) { radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 77b7401..4e7a7f7 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -48,6 +48,8 @@ void radeon_sync_create(struct radeon_sync *sync)

for (i = 0; i < RADEON_NUM_RINGS; ++i) sync->sync_to[i] = NULL; + + sync->last_vm_update = NULL; }

/** @@ -68,6 +70,11 @@ void radeon_sync_fence(struct radeon_sync *sync,

other = sync->sync_to[fence->ring]; sync->sync_to[fence->ring] = radeon_fence_later(fence, other); + + if (fence->is_vm_update) { + other = sync->last_vm_update; + sync->last_vm_update = radeon_fence_later(fence, other); + } }

/** diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 84b2735..ff104a5 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -190,7 +190,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, return NULL;

/* we definately need to flush */ - radeon_fence_unref(&vm->last_flush); + vm->pd_gpu_addr = ~0ll;

/* skip over VMID 0, since it is the system VM */ for (i = 1; i < rdev->vm_manager.nvm; ++i) { @@ -228,6 +228,7 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, * @rdev: radeon_device pointer * @vm: vm we want to flush * @ring: ring to use for flush + * @updates: last vm update that is waited for * * Flush the vm (cayman+). * @@ -235,13 +236,16 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, */ void radeon_vm_flush(struct radeon_device *rdev, struct radeon_vm *vm, - int ring) + int ring, struct radeon_fence *updates) { uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);

- /* if we can't remember our last VM flush then flush now! */ - if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { + if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || + radeon_fence_is_earlier(vm->flushed_updates, updates)) { + trace_radeon_vm_flush(pd_addr, ring, vm->id); + radeon_fence_unref(&vm->flushed_updates); + vm->flushed_updates = radeon_fence_ref(updates); vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, &rdev->ring[ring], vm->id, vm->pd_gpu_addr); @@ -272,10 +276,6 @@ void radeon_vm_fence(struct radeon_device *rdev,

radeon_fence_unref(&vm->last_id_use); vm->last_id_use = radeon_fence_ref(fence); - - /* we just flushed the VM, remember that */ - if (!vm->last_flush) - vm->last_flush = radeon_fence_ref(fence); }

/** @@ -418,6 +418,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, if (r) goto error_free;

+ ib.fence->is_vm_update = true; radeon_bo_fence(bo, ib.fence, false);

error_free: @@ -696,10 +697,10 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + ib.fence->is_vm_update = true; radeon_bo_fence(pd, ib.fence, false); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); - radeon_fence_unref(&vm->last_flush); } radeon_ib_free(rdev, &ib);

@@ -988,11 +989,11 @@ int radeon_vm_bo_update(struct radeon_device *rdev, radeon_ib_free(rdev, &ib); return r; } + ib.fence->is_vm_update = true; radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib); - radeon_fence_unref(&vm->last_flush);

return 0; } @@ -1123,7 +1124,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->id = 0; vm->ib_bo_va = NULL; vm->fence = NULL; - vm->last_flush = NULL; + vm->flushed_updates = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); vm->va = RB_ROOT; @@ -1195,7 +1196,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_bo_unref(&vm->page_directory);

radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->last_flush); + radeon_fence_unref(&vm->flushed_updates); radeon_fence_unref(&vm->last_id_use);

mutex_destroy(&vm->mutex);

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 07/11] drm/radeon: use one VMID for each ring

From: Christian König christian.koenig@amd.com

Use multiple VMIDs for each VM, one for each ring. That allows us to execute flushes separately on each ring, still not ideal cause in a lot of cases rings can share IDs.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/cik.c | 4 +-- drivers/gpu/drm/radeon/cik_sdma.c | 2 +- drivers/gpu/drm/radeon/ni.c | 6 ++-- drivers/gpu/drm/radeon/ni_dma.c | 3 +- drivers/gpu/drm/radeon/radeon.h | 36 +++++++++++++---------- drivers/gpu/drm/radeon/radeon_vm.c | 59 +++++++++++++++++++++++--------------- drivers/gpu/drm/radeon/si.c | 6 ++-- 7 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dd55ea..fae5a8c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4042,6 +4042,7 @@ struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header, control = INDIRECT_BUFFER_VALID;

if (ib->is_const_ib) { @@ -4070,8 +4071,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); }

- control |= ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0); + control |= ib->length_dw | (vm_id << 24);

radeon_ring_write(ring, header); radeon_ring_write(ring, diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 06602e4..1374ecf 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -134,7 +134,7 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; - u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf; + u32 extra_bits = (ib->vm ? ib->vm->ids[ib->ring].id : 0) & 0xf;

if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 5; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 7f451aa..aca3e91 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1366,6 +1366,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 cp_coher_cntl = PACKET3_FULL_CACHE_ENA | PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA;

@@ -1388,15 +1389,14 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

/* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_ENGINE_ME | cp_coher_cntl); radeon_ring_write(ring, 0xFFFFFFFF); radeon_ring_write(ring, 0); - radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + radeon_ring_write(ring, (vm_id << 24) | 10); /* poll interval */ }

static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 1d15f6b..efe98b1 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -123,6 +123,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;

if (rdev->wb.enabled) { u32 next_rptr = ring->wptr + 4; @@ -140,7 +141,7 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, */ while ((ring->wptr & 7) != 5) radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); - radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0)); + radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index fddb19e..bee8934 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -897,33 +897,39 @@ struct radeon_vm_pt { uint64_t addr; };

+struct radeon_vm_id { + unsigned id; + uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ + struct radeon_fence *flushed_updates; + /* last use of vmid */ + struct radeon_fence *last_id_use; +}; + struct radeon_vm { - struct rb_root va; - unsigned id; + struct rb_root va;

/* BOs moved, but not yet updated in the PT */ - struct list_head invalidated; + struct list_head invalidated;

/* BOs freed, but not yet updated in the PT */ - struct list_head freed; + struct list_head freed;

/* contains the page directory */ - struct radeon_bo *page_directory; - uint64_t pd_gpu_addr; - unsigned max_pde_used; + struct radeon_bo *page_directory; + unsigned max_pde_used;

/* array of page tables, one for each page directory entry */ - struct radeon_vm_pt *page_tables; + struct radeon_vm_pt *page_tables;

- struct radeon_bo_va *ib_bo_va; + struct radeon_bo_va *ib_bo_va;

- struct mutex mutex; + struct mutex mutex; /* last fence for cs using this vm */ - struct radeon_fence *fence; - /* last flushed PD/PT update */ - struct radeon_fence *flushed_updates; - /* last use of vmid */ - struct radeon_fence *last_id_use; + struct radeon_fence *fence; + + /* for id and flush management per ring */ + struct radeon_vm_id ids[RADEON_NUM_RINGS]; };

struct radeon_vm_manager { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ff104a5..2688d49 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -182,15 +182,18 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, struct radeon_vm *vm, int ring) { struct radeon_fence *best[RADEON_NUM_RINGS] = {}; + struct radeon_vm_id *vm_id = &vm->ids[ring]; + unsigned choices[2] = {}; unsigned i;

/* check if the id is still valid */ - if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) + if (vm_id->id && vm_id->last_id_use && + vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) return NULL;

/* we definately need to flush */ - vm->pd_gpu_addr = ~0ll; + vm_id->pd_gpu_addr = ~0ll;

/* skip over VMID 0, since it is the system VM */ for (i = 1; i < rdev->vm_manager.nvm; ++i) { @@ -198,8 +201,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,

if (fence == NULL) { /* found a free one */ - vm->id = i; - trace_radeon_vm_grab_id(vm->id, ring); + vm_id->id = i; + trace_radeon_vm_grab_id(i, ring); return NULL; }

@@ -211,8 +214,8 @@ struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,

for (i = 0; i < 2; ++i) { if (choices[i]) { - vm->id = choices[i]; - trace_radeon_vm_grab_id(vm->id, ring); + vm_id->id = choices[i]; + trace_radeon_vm_grab_id(choices[i], ring); return rdev->vm_manager.active[choices[i]]; } } @@ -239,16 +242,18 @@ void radeon_vm_flush(struct radeon_device *rdev, int ring, struct radeon_fence *updates) { uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); + struct radeon_vm_id *vm_id = &vm->ids[ring];

- if (pd_addr != vm->pd_gpu_addr || !vm->flushed_updates || - radeon_fence_is_earlier(vm->flushed_updates, updates)) { + if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || + radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {

- trace_radeon_vm_flush(pd_addr, ring, vm->id); - radeon_fence_unref(&vm->flushed_updates); - vm->flushed_updates = radeon_fence_ref(updates); - vm->pd_gpu_addr = pd_addr; + trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); + radeon_fence_unref(&vm_id->flushed_updates); + vm_id->flushed_updates = radeon_fence_ref(updates); + vm_id->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, &rdev->ring[ring], - vm->id, vm->pd_gpu_addr); + vm_id->id, vm_id->pd_gpu_addr); + } }

@@ -268,14 +273,16 @@ void radeon_vm_fence(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_fence *fence) { + unsigned vm_id = vm->ids[fence->ring].id; + radeon_fence_unref(&vm->fence); vm->fence = radeon_fence_ref(fence);

- radeon_fence_unref(&rdev->vm_manager.active[vm->id]); - rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); + radeon_fence_unref(&rdev->vm_manager.active[vm_id]); + rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);

- radeon_fence_unref(&vm->last_id_use); - vm->last_id_use = radeon_fence_ref(fence); + radeon_fence_unref(&vm->ids[fence->ring].last_id_use); + vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); }

/** @@ -1119,13 +1126,16 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, RADEON_VM_PTE_COUNT * 8); unsigned pd_size, pd_entries, pts_size; - int r; + int i, r;

- vm->id = 0; vm->ib_bo_va = NULL; vm->fence = NULL; - vm->flushed_updates = NULL; - vm->last_id_use = NULL; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + vm->ids[i].id = 0; + vm->ids[i].flushed_updates = NULL; + vm->ids[i].last_id_use = NULL; + } mutex_init(&vm->mutex); vm->va = RB_ROOT; INIT_LIST_HEAD(&vm->invalidated); @@ -1196,8 +1206,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_bo_unref(&vm->page_directory);

radeon_fence_unref(&vm->fence); - radeon_fence_unref(&vm->flushed_updates); - radeon_fence_unref(&vm->last_id_use); + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + radeon_fence_unref(&vm->ids[i].flushed_updates); + radeon_fence_unref(&vm->ids[i].last_id_use); + }

mutex_destroy(&vm->mutex); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ecfe149..63529eb 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3362,6 +3362,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { struct radeon_ring *ring = &rdev->ring[ib->ring]; + unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; u32 header;

if (ib->is_const_ib) { @@ -3397,14 +3398,13 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) #endif (ib->gpu_addr & 0xFFFFFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); - radeon_ring_write(ring, ib->length_dw | - (ib->vm ? (ib->vm->id << 24) : 0)); + radeon_ring_write(ring, ib->length_dw | (vm_id << 24));

if (!ib->is_const_ib) { /* flush read cache over gart for this vmid */ radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2); - radeon_ring_write(ring, ib->vm ? ib->vm->id : 0); + radeon_ring_write(ring, vm_id); radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA | PACKET3_TC_ACTION_ENA |

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 08/11] drm/radeon: fence BO_VAs manually

From: Christian König christian.koenig@amd.com

This allows us to finally remove the VM fence and so allow concurrent use of it from different engines.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 7 +++---- drivers/gpu/drm/radeon/radeon_cs.c | 5 ++++- drivers/gpu/drm/radeon/radeon_vm.c | 17 ++++++----------- 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bee8934..ab71050 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -453,6 +453,7 @@ struct radeon_bo_va { struct list_head bo_list; uint32_t flags; uint64_t addr; + struct radeon_fence *last_pt_update; unsigned ref_count;

/* protected by vm mutex */ @@ -907,6 +908,8 @@ struct radeon_vm_id { };

struct radeon_vm { + struct mutex mutex; + struct rb_root va;

/* BOs moved, but not yet updated in the PT */ @@ -924,10 +927,6 @@ struct radeon_vm {

struct radeon_bo_va *ib_bo_va;

- struct mutex mutex; - /* last fence for cs using this vm */ - struct radeon_fence *fence; - /* for id and flush management per ring */ struct radeon_vm_id ids[RADEON_NUM_RINGS]; }; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0619c32..9e6714b 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -494,6 +494,8 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, if (r) return r;

+ radeon_sync_resv(&p->ib.sync, vm->page_directory->tbo.resv, true); + r = radeon_vm_clear_freed(rdev, vm); if (r) return r; @@ -525,6 +527,8 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); if (r) return r; + + radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); }

return radeon_vm_clear_invalids(rdev, vm); @@ -563,7 +567,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, goto out; } radeon_cs_sync_rings(parser); - radeon_sync_fence(&parser->ib.sync, vm->fence);

if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 2688d49..61e697f 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -275,9 +275,6 @@ void radeon_vm_fence(struct radeon_device *rdev, { unsigned vm_id = vm->ids[fence->ring].id;

- radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(fence); - radeon_fence_unref(&rdev->vm_manager.active[vm_id]); rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);

@@ -706,8 +703,6 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, } ib.fence->is_vm_update = true; radeon_bo_fence(pd, ib.fence, false); - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); } radeon_ib_free(rdev, &ib);

@@ -998,8 +993,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev, } ib.fence->is_vm_update = true; radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); - radeon_fence_unref(&vm->fence); - vm->fence = radeon_fence_ref(ib.fence); + radeon_fence_unref(&bo_va->last_pt_update); + bo_va->last_pt_update = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib);

return 0; @@ -1025,6 +1020,7 @@ int radeon_vm_clear_freed(struct radeon_device *rdev, list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { r = radeon_vm_bo_update(rdev, bo_va, NULL); radeon_bo_unref(&bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); if (r) return r; @@ -1083,6 +1079,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); }

@@ -1129,8 +1126,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) int i, r;

vm->ib_bo_va = NULL; - vm->fence = NULL; - for (i = 0; i < RADEON_NUM_RINGS; ++i) { vm->ids[i].id = 0; vm->ids[i].flushed_updates = NULL; @@ -1191,11 +1186,13 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) if (!r) { list_del_init(&bo_va->bo_list); radeon_bo_unreserve(bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); } } list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { radeon_bo_unref(&bo_va->bo); + radeon_fence_unref(&bo_va->last_pt_update); kfree(bo_va); }

@@ -1205,8 +1202,6 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)

radeon_bo_unref(&vm->page_directory);

- radeon_fence_unref(&vm->fence); - for (i = 0; i < RADEON_NUM_RINGS; ++i) { radeon_fence_unref(&vm->ids[i].flushed_updates); radeon_fence_unref(&vm->ids[i].last_id_use);

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 09/11] drm/radeon: sync PD updates as shared

From: Christian König christian.koenig@amd.com

We never invalidate PD entries and making them valid can run with other users in parallel.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 61e697f..80262c6 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -694,7 +694,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, if (ib.length_dw != 0) { radeon_asic_vm_pad_ib(rdev, &ib);

- radeon_sync_resv(&ib.sync, pd->tbo.resv, false); + radeon_sync_resv(&ib.sync, pd->tbo.resv, true); WARN_ON(ib.length_dw > ndw); r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) {

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 10/11] drm/radeon: sync PT updates as shared if they are validating

From: Christian König christian.koenig@amd.com

Only invalidating PTEs needs to be executed synchronized to using the PT.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 80262c6..55afb84 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -819,7 +819,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned nptes; uint64_t pte;

- radeon_sync_resv(&ib->sync, pt->tbo.resv, false); + radeon_sync_resv(&ib->sync, pt->tbo.resv, + !!(flags & R600_PTE_VALID));

if ((addr & ~mask) == (end & ~mask)) nptes = end - addr;

-- 1.9.1

Christian König

11:52 a.m.

New subject: [PATCH 11/11] drm/radeon: update the VM after setting BO address

From: Christian König christian.koenig@amd.com

This way the necessary VM update is kicked off immediately if all BOs involved are in GPU accessible memory.

Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_gem.c | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index cbd7244..c100aa6 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -518,6 +518,63 @@ out: return r; }

+/** + * radeon_gem_va_update_vm -update the bo_va in its VM + * + * @rdev: radeon_device pointer + * @bo_va: bo_va to update + * + * Update the bo_va directly after setting it's address. Errors are not + * vital here, so they are not reported back to userspace. + */ +static void radeon_gem_va_update_vm(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) +{ + struct ttm_validate_buffer tv, *entry; + struct radeon_cs_reloc *vm_bos; + struct ww_acquire_ctx ticket; + struct list_head list; + unsigned domain; + int r; + + INIT_LIST_HEAD(&list); + + tv.bo = &bo_va->bo->tbo; + tv.shared = true; + list_add(&tv.head, &list); + + vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list); + if (!vm_bos) + return; + + r = ttm_eu_reserve_buffers(&ticket, &list, true); + if (r) + goto error_free; + + list_for_each_entry(entry, &list, head) { + domain = radeon_mem_type_to_domain(entry->bo->mem.mem_type); + /* if anything is swapped out don't swap it in here, + just abort and wait for the next CS */ + if (domain == RADEON_GEM_DOMAIN_CPU) + goto error_unreserve; + } + + r = radeon_vm_clear_freed(rdev, bo_va->vm); + if (r) + goto error_unreserve; + + r = radeon_vm_bo_update(rdev, bo_va, &bo_va->bo->tbo.mem); + +error_unreserve: + ttm_eu_backoff_reservation(&ticket, &list); + +error_free: + kfree(vm_bos); + + if (r) + DRM_ERROR("Couldn't update BO_VA (%d)\n", r); +} + int radeon_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -605,6 +662,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, goto out; } r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); + if (!r) + radeon_gem_va_update_vm(rdev, bo_va); + break; case RADEON_VA_UNMAP: r = radeon_vm_bo_set_addr(rdev, bo_va, 0, 0);

-- 1.9.1

Alex Deucher

17 Sep 17 Sep

8:54 p.m.

On Fri, Sep 12, 2014 at 7:52 AM, Christian König deathsimple@vodafone.de wrote:

...

Similar to the already pushed patches for allowing concurrent buffers use from different engines this set of patches allows concurrent VM use from different engines at the same time.

Other than the typos I pointed out, this series is: Reviewed-by: Alex Deucher alexander.deucher@amd.com

3884

Age (days ago)

3889

Last active (days ago)

dri-devel@lists.freedesktop.org

14 comments

2 participants

tags (0)

participants (2)

Alex Deucher
Christian König