Otherwise the next IB might start reading commands with the page table still invalid.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/ni.c | 4 ++++ drivers/gpu/drm/radeon/nid.h | 1 + drivers/gpu/drm/radeon/si.c | 4 ++++ 3 files changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8c74c72..19b7fe1 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1586,4 +1586,8 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); radeon_ring_write(ring, 1 << vm->id); + + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); } diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 2423d1b..cbef681 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -502,6 +502,7 @@ #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D +#define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_CB1_DEST_BASE_ENA (1 << 7) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index df8dd77..da184de 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2868,6 +2868,10 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm->id); + + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); }
/*
Handle requests that won't fit into a single packet.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/ni.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 19b7fe1..ae6d087 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1538,26 +1538,31 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); - int i;
- radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + count * 2)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); - for (i = 0; i < count; ++i) { - uint64_t value = 0; - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - addr += incr; - - } else if (flags & RADEON_VM_PAGE_VALID) { - value = addr; - addr += incr; - } + while (count) { + unsigned ndw = 1 + count * 2; + if (ndw > 0x3FFF) + ndw = 0x3FFF; + + radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + for (; ndw > 1; ndw -= 2, --count) { + uint64_t value = 0; + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + addr += incr; + + } else if (flags & RADEON_VM_PAGE_VALID) { + value = addr; + addr += incr; + }
- value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); + } } }
On Mon, Oct 22, 2012 at 4:55 AM, Christian König deathsimple@vodafone.de wrote:
Handle requests that won't fit into a single packet.
Signed-off-by: Christian König deathsimple@vodafone.de
drivers/gpu/drm/radeon/ni.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 19b7fe1..ae6d087 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1538,26 +1538,31 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
int i;
radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + count * 2));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
for (i = 0; i < count; ++i) {
uint64_t value = 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL;
addr += incr;
} else if (flags & RADEON_VM_PAGE_VALID) {
value = addr;
addr += incr;
}
while (count) {
unsigned ndw = 1 + count * 2;
if (ndw > 0x3FFF)
ndw = 0x3FFF;
radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
We need to adjust the the value of pe here if we are doing multiple loops.
Alex
for (; ndw > 1; ndw -= 2, --count) {
uint64_t value = 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL;
addr += incr;
} else if (flags & RADEON_VM_PAGE_VALID) {
value = addr;
addr += incr;
}
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
} }
}
-- 1.7.9.5
On 22.10.2012 17:12, Alex Deucher wrote:
On Mon, Oct 22, 2012 at 4:55 AM, Christian König deathsimple@vodafone.de wrote:
Handle requests that won't fit into a single packet.
Signed-off-by: Christian König deathsimple@vodafone.de
drivers/gpu/drm/radeon/ni.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 19b7fe1..ae6d087 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1538,26 +1538,31 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
int i;
radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + count * 2));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
for (i = 0; i < count; ++i) {
uint64_t value = 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL;
addr += incr;
} else if (flags & RADEON_VM_PAGE_VALID) {
value = addr;
addr += incr;
}
while (count) {
unsigned ndw = 1 + count * 2;
if (ndw > 0x3FFF)
ndw = 0x3FFF;
radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
We need to adjust the the value of pe here if we are doing multiple loops.
Oh yes indeed, missed that cause my test code didn't actually accessed the pages.
Going to send out a v2 soon.
Christian.
Alex
for (; ndw > 1; ndw -= 2, --count) {
uint64_t value = 0;
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
value &= 0xFFFFFFFFFFFFF000ULL;
addr += incr;
} else if (flags & RADEON_VM_PAGE_VALID) {
value = addr;
addr += incr;
}
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
}} }
-- 1.7.9.5
Handle requests that won't fit into a single packet.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/si.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index da184de..1b8d2d9 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2808,26 +2808,31 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, { struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); - int i; - uint64_t value;
- radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 2 + count * 2)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(1))); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe)); - for (i = 0; i < count; ++i) { - if (flags & RADEON_VM_PAGE_SYSTEM) { - value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & RADEON_VM_PAGE_VALID) - value = addr; - else - value = 0; - addr += incr; - value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + while (count) { + unsigned ndw = 2 + count * 2; + if (ndw > 0x3FFE) + ndw = 0x3FFE; + + radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(1))); + radeon_ring_write(ring, pe); + radeon_ring_write(ring, upper_32_bits(pe)); + for (; ndw > 2; ndw -= 2, --count) { + uint64_t value; + if (flags & RADEON_VM_PAGE_SYSTEM) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + } else if (flags & RADEON_VM_PAGE_VALID) + value = addr; + else + value = 0; + addr += incr; + value |= r600_flags; + radeon_ring_write(ring, value); + radeon_ring_write(ring, upper_32_bits(value)); + } } }
It's better to handle this in the chipset specific code.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon_gart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a7677dd..d84405d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -1036,8 +1036,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]); pte += (addr & mask) * 8;
- if (((last_pte + 8 * count) != pte) || - ((count + nptes) > 1 << 11)) { + if ((last_pte + 8 * count) != pte) {
if (count) { radeon_asic_vm_set_page(rdev, last_pte,
Only NI uses 3dw headers, SI uses 4dw headers.
Signed-off-by: Christian König deathsimple@vodafone.de --- drivers/gpu/drm/radeon/radeon_gart.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index d84405d..9a64f8c 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -1147,17 +1147,17 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
if (RADEON_VM_BLOCK_SIZE > 11) /* reserve space for one header for every 2k dwords */ - ndw += (nptes >> 11) * 3; + ndw += (nptes >> 11) * 4; else /* reserve space for one header for every (1 << BLOCK_SIZE) entries */ - ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3; + ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
/* reserve space for pte addresses */ ndw += nptes * 2;
/* reserve space for one header for every 2k dwords */ - ndw += (npdes >> 11) * 3; + ndw += (npdes >> 11) * 4;
/* reserve space for pde addresses */ ndw += npdes * 2;
On Mon, Oct 22, 2012 at 4:55 AM, Christian König deathsimple@vodafone.de wrote:
Otherwise the next IB might start reading commands with the page table still invalid.
Signed-off-by: Christian König deathsimple@vodafone.de
For the series:
Reviewed-by: Alex Deucher alexander.deucher@amd.com
drivers/gpu/drm/radeon/ni.c | 4 ++++ drivers/gpu/drm/radeon/nid.h | 1 + drivers/gpu/drm/radeon/si.c | 4 ++++ 3 files changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8c74c72..19b7fe1 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1586,4 +1586,8 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-7 are the VM contexts0-7 */ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); radeon_ring_write(ring, 1 << vm->id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0);
} diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 2423d1b..cbef681 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -502,6 +502,7 @@ #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C #define PACKET3_MEM_WRITE 0x3D +#define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_CB1_DEST_BASE_ENA (1 << 7) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index df8dd77..da184de 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2868,6 +2868,10 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm->id);
/* sync PFP to ME, otherwise we might get invalid PFP reads */
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0);
}
/*
1.7.9.5
dri-devel@lists.freedesktop.org