From: Michel Dänzer michel.daenzer@amd.com
get_page_entry calculates the GART page table entry, which is just written to the GART page table by set_page_entry.
This is a prerequisite for the following fix.
Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/r100.c | 10 +++++++-- drivers/gpu/drm/radeon/r300.c | 16 ++++++++++----- drivers/gpu/drm/radeon/radeon.h | 8 ++++++-- drivers/gpu/drm/radeon/radeon_asic.c | 24 ++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_asic.h | 12 +++++++---- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ drivers/gpu/drm/radeon/radeon_gart.c | 37 +++++++++++++++++++++------------- drivers/gpu/drm/radeon/rs400.c | 14 ++++++++----- drivers/gpu/drm/radeon/rs600.c | 14 ++++++++----- 9 files changed, 100 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 74f06d5..279801c 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -644,6 +644,7 @@ int r100_pci_gart_init(struct radeon_device *rdev) return r; rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush; + rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry; rdev->asic->gart.set_page = &r100_pci_gart_set_page; return radeon_gart_table_ram_alloc(rdev); } @@ -681,11 +682,16 @@ void r100_pci_gart_disable(struct radeon_device *rdev) WREG32(RADEON_AIC_HI_ADDR, 0); }
+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags) +{ + return addr; +} + void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags) + uint64_t entry) { u32 *gtt = rdev->gart.ptr; - gtt[i] = cpu_to_le32(lower_32_bits(addr)); + gtt[i] = cpu_to_le32(lower_32_bits(entry)); }
void r100_pci_gart_fini(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 064ad55..08d68f3 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -73,11 +73,8 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev) #define R300_PTE_WRITEABLE (1 << 2) #define R300_PTE_READABLE (1 << 3)
-void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags) +uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags) { - void __iomem *ptr = rdev->gart.ptr; - addr = (lower_32_bits(addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 24); if (flags & RADEON_GART_PAGE_READ) @@ -86,10 +83,18 @@ void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, addr |= R300_PTE_WRITEABLE; if (!(flags & RADEON_GART_PAGE_SNOOP)) addr |= R300_PTE_UNSNOOPED; + return addr; +} + +void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, + uint64_t entry) +{ + void __iomem *ptr = rdev->gart.ptr; + /* on x86 we want this to be CPU endian, on powerpc * on powerpc without HW swappers, it'll get swapped on way * into VRAM - so no need for cpu_to_le32 on VRAM tables */ - writel(addr, ((void __iomem *)ptr) + (i * 4)); + writel(entry, ((void __iomem *)ptr) + (i * 4)); }
int rv370_pcie_gart_init(struct radeon_device *rdev) @@ -109,6 +114,7 @@ int rv370_pcie_gart_init(struct radeon_device *rdev) DRM_ERROR("Failed to register debugfs file for PCIE gart !\n"); rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush; + rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry; rdev->asic->gart.set_page = &rv370_pcie_gart_set_page; return radeon_gart_table_vram_alloc(rdev); } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 54529b8..40c4c7a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -242,6 +242,7 @@ bool radeon_get_bios(struct radeon_device *rdev); * Dummy page */ struct radeon_dummy_page { + uint64_t entry; struct page *page; dma_addr_t addr; }; @@ -646,6 +647,7 @@ struct radeon_gart { unsigned table_size; struct page **pages; dma_addr_t *pages_addr; + uint64_t *pages_entry; bool ready; };
@@ -1847,8 +1849,9 @@ struct radeon_asic { /* gart */ struct { void (*tlb_flush)(struct radeon_device *rdev); + uint64_t (*get_page_entry)(uint64_t addr, uint32_t flags); void (*set_page)(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags); + uint64_t entry); } gart; struct { int (*init)(struct radeon_device *rdev); @@ -2852,7 +2855,8 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) -#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) +#define radeon_gart_get_page_entry(a, f) (rdev)->asic->gart.get_page_entry((a), (f)) +#define radeon_gart_set_page(rdev, i, e) (rdev)->asic->gart.set_page((rdev), (i), (e)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 850de57..2c35b5e 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -159,11 +159,13 @@ void radeon_agp_disable(struct radeon_device *rdev) DRM_INFO("Forcing AGP to PCIE mode\n"); rdev->flags |= RADEON_IS_PCIE; rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush; + rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry; rdev->asic->gart.set_page = &rv370_pcie_gart_set_page; } else { DRM_INFO("Forcing AGP to PCI mode\n"); rdev->flags |= RADEON_IS_PCI; rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush; + rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry; rdev->asic->gart.set_page = &r100_pci_gart_set_page; } rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; @@ -199,6 +201,7 @@ static struct radeon_asic r100_asic = { .mc_wait_for_idle = &r100_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush, + .get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = { @@ -265,6 +268,7 @@ static struct radeon_asic r200_asic = { .mc_wait_for_idle = &r100_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush, + .get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = { @@ -345,6 +349,7 @@ static struct radeon_asic r300_asic = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush, + .get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = { @@ -411,6 +416,7 @@ static struct radeon_asic r300_asic_pcie = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush, + .get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = { @@ -477,6 +483,7 @@ static struct radeon_asic r420_asic = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush, + .get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = { @@ -543,6 +550,7 @@ static struct radeon_asic rs400_asic = { .mc_wait_for_idle = &rs400_mc_wait_for_idle, .gart = { .tlb_flush = &rs400_gart_tlb_flush, + .get_page_entry = &rs400_gart_get_page_entry, .set_page = &rs400_gart_set_page, }, .ring = { @@ -609,6 +617,7 @@ static struct radeon_asic rs600_asic = { .mc_wait_for_idle = &rs600_mc_wait_for_idle, .gart = { .tlb_flush = &rs600_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -677,6 +686,7 @@ static struct radeon_asic rs690_asic = { .mc_wait_for_idle = &rs690_mc_wait_for_idle, .gart = { .tlb_flush = &rs400_gart_tlb_flush, + .get_page_entry = &rs400_gart_get_page_entry, .set_page = &rs400_gart_set_page, }, .ring = { @@ -745,6 +755,7 @@ static struct radeon_asic rv515_asic = { .mc_wait_for_idle = &rv515_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush, + .get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = { @@ -811,6 +822,7 @@ static struct radeon_asic r520_asic = { .mc_wait_for_idle = &r520_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush, + .get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = { @@ -905,6 +917,7 @@ static struct radeon_asic r600_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -990,6 +1003,7 @@ static struct radeon_asic rv6xx_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1081,6 +1095,7 @@ static struct radeon_asic rs780_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1185,6 +1200,7 @@ static struct radeon_asic rv770_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1303,6 +1319,7 @@ static struct radeon_asic evergreen_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1395,6 +1412,7 @@ static struct radeon_asic sumo_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1486,6 +1504,7 @@ static struct radeon_asic btc_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = { @@ -1621,6 +1640,7 @@ static struct radeon_asic cayman_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &cayman_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = { @@ -1724,6 +1744,7 @@ static struct radeon_asic trinity_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &cayman_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = { @@ -1857,6 +1878,7 @@ static struct radeon_asic si_asic = { .get_gpu_clock_counter = &si_get_gpu_clock_counter, .gart = { .tlb_flush = &si_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = { @@ -2018,6 +2040,7 @@ static struct radeon_asic ci_asic = { .get_gpu_clock_counter = &cik_get_gpu_clock_counter, .gart = { .tlb_flush = &cik_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = { @@ -2125,6 +2148,7 @@ static struct radeon_asic kv_asic = { .get_gpu_clock_counter = &cik_get_gpu_clock_counter, .gart = { .tlb_flush = &cik_pcie_gart_tlb_flush, + .get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 2a45d54..8d787d1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -67,8 +67,9 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int r100_asic_reset(struct radeon_device *rdev); u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc); void r100_pci_gart_tlb_flush(struct radeon_device *rdev); +uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags); void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags); + uint64_t entry); void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); @@ -172,8 +173,9 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); extern int r300_cs_parse(struct radeon_cs_parser *p); extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev); +extern uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags); extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags); + uint64_t entry); extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes); extern int rv370_get_pcie_lanes(struct radeon_device *rdev); extern void r300_set_reg_safe(struct radeon_device *rdev); @@ -208,8 +210,9 @@ extern void rs400_fini(struct radeon_device *rdev); extern int rs400_suspend(struct radeon_device *rdev); extern int rs400_resume(struct radeon_device *rdev); void rs400_gart_tlb_flush(struct radeon_device *rdev); +uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags); void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags); + uint64_t entry); uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int rs400_gart_init(struct radeon_device *rdev); @@ -232,8 +235,9 @@ int rs600_irq_process(struct radeon_device *rdev); void rs600_irq_disable(struct radeon_device *rdev); u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc); void rs600_gart_tlb_flush(struct radeon_device *rdev); +uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags); void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags); + uint64_t entry); uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); void rs600_bandwidth_update(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 0ec6516..bd7519f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -774,6 +774,8 @@ int radeon_dummy_page_init(struct radeon_device *rdev) rdev->dummy_page.page = NULL; return -ENOMEM; } + rdev->dummy_page.entry = radeon_gart_get_page_entry(rdev->dummy_page.addr, + RADEON_GART_PAGE_DUMMY); return 0; }
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 84146d5..a530932 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -228,7 +228,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, unsigned t; unsigned p; int i, j; - u64 page_base;
if (!rdev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); @@ -240,13 +239,12 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, if (rdev->gart.pages[p]) { rdev->gart.pages[p] = NULL; rdev->gart.pages_addr[p] = rdev->dummy_page.addr; - page_base = rdev->gart.pages_addr[p]; for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { + rdev->gart.pages_entry[t] = rdev->dummy_page.entry; if (rdev->gart.ptr) { - radeon_gart_set_page(rdev, t, page_base, - RADEON_GART_PAGE_DUMMY); + radeon_gart_set_page(rdev, t, + rdev->dummy_page.entry); } - page_base += RADEON_GPU_PAGE_SIZE; } } } @@ -274,7 +272,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, { unsigned t; unsigned p; - uint64_t page_base; + uint64_t page_base, page_entry; int i, j;
if (!rdev->gart.ready) { @@ -287,12 +285,14 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, for (i = 0; i < pages; i++, p++) { rdev->gart.pages_addr[p] = dma_addr[i]; rdev->gart.pages[p] = pagelist[i]; - if (rdev->gart.ptr) { - page_base = rdev->gart.pages_addr[p]; - for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { - radeon_gart_set_page(rdev, t, page_base, flags); - page_base += RADEON_GPU_PAGE_SIZE; + page_base = dma_addr[i]; + for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { + page_entry = radeon_gart_get_page_entry(page_base, flags); + rdev->gart.pages_entry[t] = page_entry; + if (rdev->gart.ptr) { + radeon_gart_set_page(rdev, t, page_entry); } + page_base += RADEON_GPU_PAGE_SIZE; } } mb(); @@ -340,10 +340,17 @@ int radeon_gart_init(struct radeon_device *rdev) radeon_gart_fini(rdev); return -ENOMEM; } + rdev->gart.pages_entry = vmalloc(sizeof(uint64_t) * + rdev->gart.num_gpu_pages); + if (rdev->gart.pages_entry == NULL) { + radeon_gart_fini(rdev); + return -ENOMEM; + } /* set GART entry to point to the dummy page by default */ - for (i = 0; i < rdev->gart.num_cpu_pages; i++) { + for (i = 0; i < rdev->gart.num_cpu_pages; i++) rdev->gart.pages_addr[i] = rdev->dummy_page.addr; - } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + rdev->gart.pages_entry[i] = rdev->dummy_page.entry; return 0; }
@@ -356,15 +363,17 @@ int radeon_gart_init(struct radeon_device *rdev) */ void radeon_gart_fini(struct radeon_device *rdev) { - if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) { + if (rdev->gart.ready) { /* unbind pages */ radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); } rdev->gart.ready = false; vfree(rdev->gart.pages); vfree(rdev->gart.pages_addr); + vfree(rdev->gart.pages_entry); rdev->gart.pages = NULL; rdev->gart.pages_addr = NULL; + rdev->gart.pages_entry = NULL;
radeon_dummy_page_fini(rdev); } diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index c5799f16..34e3235 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -212,11 +212,9 @@ void rs400_gart_fini(struct radeon_device *rdev) #define RS400_PTE_WRITEABLE (1 << 2) #define RS400_PTE_READABLE (1 << 3)
-void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags) +uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags) { uint32_t entry; - u32 *gtt = rdev->gart.ptr;
entry = (lower_32_bits(addr) & PAGE_MASK) | ((upper_32_bits(addr) & 0xff) << 4); @@ -226,8 +224,14 @@ void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, entry |= RS400_PTE_WRITEABLE; if (!(flags & RADEON_GART_PAGE_SNOOP)) entry |= RS400_PTE_UNSNOOPED; - entry = cpu_to_le32(entry); - gtt[i] = entry; + return entry; +} + +void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, + uint64_t entry) +{ + u32 *gtt = rdev->gart.ptr; + gtt[i] = cpu_to_le32(lower_32_bits(entry)); }
int rs400_mc_wait_for_idle(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 9acb1c3..74bce91 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -625,11 +625,8 @@ static void rs600_gart_fini(struct radeon_device *rdev) radeon_gart_table_vram_free(rdev); }
-void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr, uint32_t flags) +uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags) { - void __iomem *ptr = (void *)rdev->gart.ptr; - addr = addr & 0xFFFFFFFFFFFFF000ULL; addr |= R600_PTE_SYSTEM; if (flags & RADEON_GART_PAGE_VALID) @@ -640,7 +637,14 @@ void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, addr |= R600_PTE_WRITEABLE; if (flags & RADEON_GART_PAGE_SNOOP) addr |= R600_PTE_SNOOPED; - writeq(addr, ptr + (i * 8)); + return addr; +} + +void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, + uint64_t entry) +{ + void __iomem *ptr = (void *)rdev->gart.ptr; + writeq(entry, ptr + (i * 8)); }
int rs600_irq_set(struct radeon_device *rdev)
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/cik.c | 1 + drivers/gpu/drm/radeon/evergreen.c | 1 + drivers/gpu/drm/radeon/ni.c | 1 + drivers/gpu/drm/radeon/r300.c | 1 + drivers/gpu/drm/radeon/r600.c | 1 + drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/radeon/rs600.c | 1 + drivers/gpu/drm/radeon/rv770.c | 1 + drivers/gpu/drm/radeon/si.c | 1 + 10 files changed, 31 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 64fdae5..8870d51 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5729,6 +5729,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) | diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 85995b4..ffa4d6c3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2426,6 +2426,7 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 360de9f..92f488d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1229,6 +1229,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) | diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 08d68f3..ef05569 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -132,6 +132,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* discard memory request outside of configured range */ tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD; WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ef5d606..049a571 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1056,6 +1056,7 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev);
/* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 40c4c7a..6e9e5ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -664,6 +664,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags); +void radeon_gart_restore(struct radeon_device *rdev);
/* diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..556270d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -301,6 +301,28 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, }
/** + * radeon_gart_restore - bind all pages in the gart page table + * + * @rdev: radeon_device pointer + * + * Binds all pages in the gart page table (all asics). + * Used to rebuild the gart table on device startup or resume. + */ +void radeon_gart_restore(struct radeon_device *rdev) +{ + int i; + + if (!rdev->gart.ptr) { + return; + } + for (i = 0; i < rdev->gart.num_gpu_pages; i++) { + radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]); + } + mb(); + radeon_gart_tlb_flush(rdev); +} + +/** * radeon_gart_init - init the driver info for managing the gart * * @rdev: radeon_device pointer diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 74bce91..17b7868 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -555,6 +555,7 @@ static int rs600_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Enable bus master */ tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS; WREG32(RADEON_BUS_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 372016e..6aa6fea 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -899,6 +899,7 @@ static int rv770_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5d89b87..7d936e8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4249,6 +4249,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; + radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) |
Am 21.01.2015 um 09:36 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Can we make that directly part of radeon_gart_table_vram_pin? Doesn't seem to make sense to always need to call both functions.
Additional to that couldn't we just stop mapping/unmapping the BO in radeon_gart_table_vram_pin? As far as I know CPU mapped BOs can still move around.
Christian.
drivers/gpu/drm/radeon/cik.c | 1 + drivers/gpu/drm/radeon/evergreen.c | 1 + drivers/gpu/drm/radeon/ni.c | 1 + drivers/gpu/drm/radeon/r300.c | 1 + drivers/gpu/drm/radeon/r600.c | 1 + drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_gart.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/radeon/rs600.c | 1 + drivers/gpu/drm/radeon/rv770.c | 1 + drivers/gpu/drm/radeon/si.c | 1 + 10 files changed, 31 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 64fdae5..8870d51 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5729,6 +5729,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) |
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 85995b4..ffa4d6c3 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2426,6 +2426,7 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 360de9f..92f488d 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1229,6 +1229,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) |
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 08d68f3..ef05569 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -132,6 +132,7 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* discard memory request outside of configured range */ tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD; WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index ef5d606..049a571 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1056,6 +1056,7 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
radeon_gart_restore(rdev);
/* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 40c4c7a..6e9e5ef 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -664,6 +664,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags); +void radeon_gart_restore(struct radeon_device *rdev);
/* diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..556270d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -301,6 +301,28 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, }
/**
- radeon_gart_restore - bind all pages in the gart page table
- @rdev: radeon_device pointer
- Binds all pages in the gart page table (all asics).
- Used to rebuild the gart table on device startup or resume.
- */
+void radeon_gart_restore(struct radeon_device *rdev) +{
- int i;
- if (!rdev->gart.ptr) {
return;
- }
- for (i = 0; i < rdev->gart.num_gpu_pages; i++) {
radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
- }
- mb();
- radeon_gart_tlb_flush(rdev);
+}
+/**
- radeon_gart_init - init the driver info for managing the gart
- @rdev: radeon_device pointer
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 74bce91..17b7868 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -555,6 +555,7 @@ static int rs600_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Enable bus master */ tmp = RREG32(RADEON_BUS_CNTL) & ~RS600_BUS_MASTER_DIS; WREG32(RADEON_BUS_CNTL, tmp);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 372016e..6aa6fea 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -899,6 +899,7 @@ static int rv770_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5d89b87..7d936e8 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -4249,6 +4249,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r;
- radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) |
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com ---
v2: Add logic to radeon_gart_table_vram_pin directly instead of reinstating radeon_gart_restore function
drivers/gpu/drm/radeon/radeon_gart.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..0c8c739 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -163,6 +163,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr); if (r) radeon_bo_unpin(rdev->gart.robj); + else { + int i; + + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]); + mb(); + radeon_gart_tlb_flush(rdev); + } radeon_bo_unreserve(rdev->gart.robj); rdev->gart.table_addr = gpu_addr; return r;
Am 22.01.2015 um 08:30 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
v2: Add logic to radeon_gart_table_vram_pin directly instead of reinstating radeon_gart_restore function
drivers/gpu/drm/radeon/radeon_gart.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..0c8c739 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -163,6 +163,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr); if (r) radeon_bo_unpin(rdev->gart.robj);
- else {
I would add a comment why we do this here.
int i;
for (i = 0; i < rdev->gart.num_gpu_pages; i++)
radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
mb();
radeon_gart_tlb_flush(rdev);
That TLB flush won't work correctly because the table_addr isn't up to date yet.
- } radeon_bo_unreserve(rdev->gart.robj); rdev->gart.table_addr = gpu_addr;
It's updated here instead. Maybe completely drop the local gpu_addr variable and update the table_addr directly instead.
Christian.
return r;
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
v2: Add logic to radeon_gart_table_vram_pin directly instead of reinstating radeon_gart_restore v3: Move code after assignment of rdev->gart.table_addr so that the GART TLB flush can work as intended, add code comment explaining why we're doing this
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/radeon_gart.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..c7be612 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -165,6 +165,19 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) radeon_bo_unpin(rdev->gart.robj); radeon_bo_unreserve(rdev->gart.robj); rdev->gart.table_addr = gpu_addr; + + if (!r) { + int i; + + /* We might have dropped some GART table updates while it wasn't + * mapped, restore all entries + */ + for (i = 0; i < rdev->gart.num_gpu_pages; i++) + radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]); + mb(); + radeon_gart_tlb_flush(rdev); + } + return r; }
Am 22.01.2015 um 10:58 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
v2: Add logic to radeon_gart_table_vram_pin directly instead of reinstating radeon_gart_restore v3: Move code after assignment of rdev->gart.table_addr so that the GART TLB flush can work as intended, add code comment explaining why we're doing this
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
This one and the two others in the series are Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/radeon_gart.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..c7be612 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -165,6 +165,19 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) radeon_bo_unpin(rdev->gart.robj); radeon_bo_unreserve(rdev->gart.robj); rdev->gart.table_addr = gpu_addr;
- if (!r) {
int i;
/* We might have dropped some GART table updates while it wasn't
* mapped, restore all entries
*/
for (i = 0; i < rdev->gart.num_gpu_pages; i++)
radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
mb();
radeon_gart_tlb_flush(rdev);
- }
- return r; }
On 22.01.2015 18:06, Christian König wrote:
Am 22.01.2015 um 08:30 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
v2: Add logic to radeon_gart_table_vram_pin directly instead of reinstating radeon_gart_restore function
drivers/gpu/drm/radeon/radeon_gart.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index a530932..0c8c739 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -163,6 +163,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev) r = radeon_bo_kmap(rdev->gart.robj, &rdev->gart.ptr); if (r) radeon_bo_unpin(rdev->gart.robj);
- else {
I would add a comment why we do this here.
Added in v3.
int i;
for (i = 0; i < rdev->gart.num_gpu_pages; i++)
radeon_gart_set_page(rdev, i, rdev->gart.pages_entry[i]);
mb();
radeon_gart_tlb_flush(rdev);
That TLB flush won't work correctly because the table_addr isn't up to date yet.
Ugh, thanks for the catch.
- } radeon_bo_unreserve(rdev->gart.robj); rdev->gart.table_addr = gpu_addr;
It's updated here instead. Maybe completely drop the local gpu_addr variable and update the table_addr directly instead.
I chose the less invasive fix in v3.
On 21.01.2015 18:22, Christian König wrote:
Am 21.01.2015 um 09:36 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Can we make that directly part of radeon_gart_table_vram_pin? Doesn't seem to make sense to always need to call both functions.
Good point, fixed in v2.
Additional to that couldn't we just stop mapping/unmapping the BO in radeon_gart_table_vram_pin? As far as I know CPU mapped BOs can still move around.
You're probably thinking of userspace mappings. I think the kernel mapping would continue pointing to the same area of VRAM even while the BO is evicted from VRAM or pinned back to another area of VRAM.
Am 22.01.2015 um 08:31 schrieb Michel Dänzer:
On 21.01.2015 18:22, Christian König wrote:
Am 21.01.2015 um 09:36 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Can we make that directly part of radeon_gart_table_vram_pin? Doesn't seem to make sense to always need to call both functions.
Good point, fixed in v2.
Additional to that couldn't we just stop mapping/unmapping the BO in radeon_gart_table_vram_pin? As far as I know CPU mapped BOs can still move around.
You're probably thinking of userspace mappings. I think the kernel mapping would continue pointing to the same area of VRAM even while the BO is evicted from VRAM or pinned back to another area of VRAM.
Oh really? I was always under the impression that we only need to wait for moves to complete and the kernel page tables would point to the new location after that automatically.
If that's not the case we might have a problem in the UVD code as well.
Regards, Christian.
On 22.01.2015 18:08, Christian König wrote:
Am 22.01.2015 um 08:31 schrieb Michel Dänzer:
On 21.01.2015 18:22, Christian König wrote:
Am 21.01.2015 um 09:36 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Can we make that directly part of radeon_gart_table_vram_pin? Doesn't seem to make sense to always need to call both functions.
Good point, fixed in v2.
Additional to that couldn't we just stop mapping/unmapping the BO in radeon_gart_table_vram_pin? As far as I know CPU mapped BOs can still move around.
You're probably thinking of userspace mappings. I think the kernel mapping would continue pointing to the same area of VRAM even while the BO is evicted from VRAM or pinned back to another area of VRAM.
Oh really? I was always under the impression that we only need to wait for moves to complete and the kernel page tables would point to the new location after that automatically.
If that's not the case we might have a problem in the UVD code as well.
AFAIK it's not the case. If you can't confirm it either way from looking at the TTM code, maybe you can hack up a test to verify it?
On 22.01.2015 18:28, Michel Dänzer wrote:
On 22.01.2015 18:08, Christian König wrote:
Am 22.01.2015 um 08:31 schrieb Michel Dänzer:
On 21.01.2015 18:22, Christian König wrote:
Am 21.01.2015 um 09:36 schrieb Michel Dänzer:
From: Michel Dänzer michel.daenzer@amd.com
The GART table BO has to be moved out of VRAM for suspend/resume. Any updates to the GART table during that time were silently dropped without this change. This caused GPU lockups on resume in some cases, see the bug reports referenced below.
This might also make GPU reset more robust in some cases, as we no longer rely on the GART table in VRAM being preserved across the GPU lockup/reset.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85204 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86267 Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Can we make that directly part of radeon_gart_table_vram_pin? Doesn't seem to make sense to always need to call both functions.
Good point, fixed in v2.
Additional to that couldn't we just stop mapping/unmapping the BO in radeon_gart_table_vram_pin? As far as I know CPU mapped BOs can still move around.
You're probably thinking of userspace mappings. I think the kernel mapping would continue pointing to the same area of VRAM even while the BO is evicted from VRAM or pinned back to another area of VRAM.
Oh really? I was always under the impression that we only need to wait for moves to complete and the kernel page tables would point to the new location after that automatically.
If that's not the case we might have a problem in the UVD code as well.
AFAIK it's not the case. If you can't confirm it either way from looking at the TTM code, maybe you can hack up a test to verify it?
Actually, I think I already tested it a couple of years ago, when I tried un-pinning the fbdev BO while it's not being scanned out. I could see fbcon scribbling over other BOs in VRAM when there was console output. :)
From: Michel Dänzer michel.daenzer@amd.com
radeon_vm_map_gart can use rdev->gart.pages_entry instead.
Also move the masking of the page address to radeon_vm_map_gart from its callers.
Signed-off-by: Michel Dänzer michel.daenzer@amd.com --- drivers/gpu/drm/radeon/cik_sdma.c | 1 - drivers/gpu/drm/radeon/ni_dma.c | 1 - drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_gart.c | 12 ------------ drivers/gpu/drm/radeon/radeon_vm.c | 6 ++---- drivers/gpu/drm/radeon/si_dma.c | 1 - 6 files changed, 2 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index a0133c7..42cd0cf 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -816,7 +816,6 @@ void cik_sdma_vm_write_pages(struct radeon_device *rdev, for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & R600_PTE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; } else if (flags & R600_PTE_VALID) { value = addr; } else { diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 50f8861..4cb56af 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -372,7 +372,6 @@ void cayman_dma_vm_write_pages(struct radeon_device *rdev, for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & R600_PTE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; } else if (flags & R600_PTE_VALID) { value = addr; } else { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 6e9e5ef..b766be7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -646,7 +646,6 @@ struct radeon_gart { unsigned num_cpu_pages; unsigned table_size; struct page **pages; - dma_addr_t *pages_addr; uint64_t *pages_entry; bool ready; }; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 556270d..c448d1a 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -238,7 +238,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, for (i = 0; i < pages; i++, p++) { if (rdev->gart.pages[p]) { rdev->gart.pages[p] = NULL; - rdev->gart.pages_addr[p] = rdev->dummy_page.addr; for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { rdev->gart.pages_entry[t] = rdev->dummy_page.entry; if (rdev->gart.ptr) { @@ -283,7 +282,6 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
for (i = 0; i < pages; i++, p++) { - rdev->gart.pages_addr[p] = dma_addr[i]; rdev->gart.pages[p] = pagelist[i]; page_base = dma_addr[i]; for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) { @@ -356,12 +354,6 @@ int radeon_gart_init(struct radeon_device *rdev) radeon_gart_fini(rdev); return -ENOMEM; } - rdev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * - rdev->gart.num_cpu_pages); - if (rdev->gart.pages_addr == NULL) { - radeon_gart_fini(rdev); - return -ENOMEM; - } rdev->gart.pages_entry = vmalloc(sizeof(uint64_t) * rdev->gart.num_gpu_pages); if (rdev->gart.pages_entry == NULL) { @@ -369,8 +361,6 @@ int radeon_gart_init(struct radeon_device *rdev) return -ENOMEM; } /* set GART entry to point to the dummy page by default */ - for (i = 0; i < rdev->gart.num_cpu_pages; i++) - rdev->gart.pages_addr[i] = rdev->dummy_page.addr; for (i = 0; i < rdev->gart.num_gpu_pages; i++) rdev->gart.pages_entry[i] = rdev->dummy_page.entry; return 0; @@ -391,10 +381,8 @@ void radeon_gart_fini(struct radeon_device *rdev) } rdev->gart.ready = false; vfree(rdev->gart.pages); - vfree(rdev->gart.pages_addr); vfree(rdev->gart.pages_entry); rdev->gart.pages = NULL; - rdev->gart.pages_addr = NULL; rdev->gart.pages_entry = NULL;
radeon_dummy_page_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index cde48c4..06d2246 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -587,10 +587,8 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) uint64_t result;
/* page table offset */ - result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); + result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; + result &= ~RADEON_GPU_PAGE_MASK;
return result; } diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index aa7b872..8320792 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -123,7 +123,6 @@ void si_dma_vm_write_pages(struct radeon_device *rdev, for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & R600_PTE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; } else if (flags & R600_PTE_VALID) { value = addr; } else {
On Wed, Jan 21, 2015 at 3:36 AM, Michel Dänzer michel@daenzer.net wrote:
From: Michel Dänzer michel.daenzer@amd.com
get_page_entry calculates the GART page table entry, which is just written to the GART page table by set_page_entry.
This is a prerequisite for the following fix.
Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer michel.daenzer@amd.com
Applied the updated series to my -fixes tree. Thanks!
Alex
drivers/gpu/drm/radeon/r100.c | 10 +++++++-- drivers/gpu/drm/radeon/r300.c | 16 ++++++++++----- drivers/gpu/drm/radeon/radeon.h | 8 ++++++-- drivers/gpu/drm/radeon/radeon_asic.c | 24 ++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_asic.h | 12 +++++++---- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ drivers/gpu/drm/radeon/radeon_gart.c | 37 +++++++++++++++++++++------------- drivers/gpu/drm/radeon/rs400.c | 14 ++++++++----- drivers/gpu/drm/radeon/rs600.c | 14 ++++++++----- 9 files changed, 100 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 74f06d5..279801c 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -644,6 +644,7 @@ int r100_pci_gart_init(struct radeon_device *rdev) return r; rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry; rdev->asic->gart.set_page = &r100_pci_gart_set_page; return radeon_gart_table_ram_alloc(rdev);
} @@ -681,11 +682,16 @@ void r100_pci_gart_disable(struct radeon_device *rdev) WREG32(RADEON_AIC_HI_ADDR, 0); }
+uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags) +{
return addr;
+}
void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags)
uint64_t entry)
{ u32 *gtt = rdev->gart.ptr;
gtt[i] = cpu_to_le32(lower_32_bits(addr));
gtt[i] = cpu_to_le32(lower_32_bits(entry));
}
void r100_pci_gart_fini(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 064ad55..08d68f3 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -73,11 +73,8 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev) #define R300_PTE_WRITEABLE (1 << 2) #define R300_PTE_READABLE (1 << 3)
-void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags)
+uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags) {
void __iomem *ptr = rdev->gart.ptr;
addr = (lower_32_bits(addr) >> 8) | ((upper_32_bits(addr) & 0xff) << 24); if (flags & RADEON_GART_PAGE_READ)
@@ -86,10 +83,18 @@ void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, addr |= R300_PTE_WRITEABLE; if (!(flags & RADEON_GART_PAGE_SNOOP)) addr |= R300_PTE_UNSNOOPED;
return addr;
+}
+void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t entry)
+{
void __iomem *ptr = rdev->gart.ptr;
/* on x86 we want this to be CPU endian, on powerpc * on powerpc without HW swappers, it'll get swapped on way * into VRAM - so no need for cpu_to_le32 on VRAM tables */
writel(addr, ((void __iomem *)ptr) + (i * 4));
writel(entry, ((void __iomem *)ptr) + (i * 4));
}
int rv370_pcie_gart_init(struct radeon_device *rdev) @@ -109,6 +114,7 @@ int rv370_pcie_gart_init(struct radeon_device *rdev) DRM_ERROR("Failed to register debugfs file for PCIE gart !\n"); rdev->gart.table_size = rdev->gart.num_gpu_pages * 4; rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry; rdev->asic->gart.set_page = &rv370_pcie_gart_set_page; return radeon_gart_table_vram_alloc(rdev);
} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 54529b8..40c4c7a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -242,6 +242,7 @@ bool radeon_get_bios(struct radeon_device *rdev);
- Dummy page
*/ struct radeon_dummy_page {
uint64_t entry; struct page *page; dma_addr_t addr;
}; @@ -646,6 +647,7 @@ struct radeon_gart { unsigned table_size; struct page **pages; dma_addr_t *pages_addr;
uint64_t *pages_entry; bool ready;
};
@@ -1847,8 +1849,9 @@ struct radeon_asic { /* gart */ struct { void (*tlb_flush)(struct radeon_device *rdev);
uint64_t (*get_page_entry)(uint64_t addr, uint32_t flags); void (*set_page)(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags);
uint64_t entry); } gart; struct { int (*init)(struct radeon_device *rdev);
@@ -2852,7 +2855,8 @@ static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v) #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) -#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) +#define radeon_gart_get_page_entry(a, f) (rdev)->asic->gart.get_page_entry((a), (f)) +#define radeon_gart_set_page(rdev, i, e) (rdev)->asic->gart.set_page((rdev), (i), (e)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_copy_pages(rdev, ib, pe, src, count) ((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 850de57..2c35b5e 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -159,11 +159,13 @@ void radeon_agp_disable(struct radeon_device *rdev) DRM_INFO("Forcing AGP to PCIE mode\n"); rdev->flags |= RADEON_IS_PCIE; rdev->asic->gart.tlb_flush = &rv370_pcie_gart_tlb_flush;
rdev->asic->gart.get_page_entry = &rv370_pcie_gart_get_page_entry; rdev->asic->gart.set_page = &rv370_pcie_gart_set_page; } else { DRM_INFO("Forcing AGP to PCI mode\n"); rdev->flags |= RADEON_IS_PCI; rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry; rdev->asic->gart.set_page = &r100_pci_gart_set_page; } rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
@@ -199,6 +201,7 @@ static struct radeon_asic r100_asic = { .mc_wait_for_idle = &r100_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush,
.get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = {
@@ -265,6 +268,7 @@ static struct radeon_asic r200_asic = { .mc_wait_for_idle = &r100_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush,
.get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = {
@@ -345,6 +349,7 @@ static struct radeon_asic r300_asic = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &r100_pci_gart_tlb_flush,
.get_page_entry = &r100_pci_gart_get_page_entry, .set_page = &r100_pci_gart_set_page, }, .ring = {
@@ -411,6 +416,7 @@ static struct radeon_asic r300_asic_pcie = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush,
.get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = {
@@ -477,6 +483,7 @@ static struct radeon_asic r420_asic = { .mc_wait_for_idle = &r300_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush,
.get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = {
@@ -543,6 +550,7 @@ static struct radeon_asic rs400_asic = { .mc_wait_for_idle = &rs400_mc_wait_for_idle, .gart = { .tlb_flush = &rs400_gart_tlb_flush,
.get_page_entry = &rs400_gart_get_page_entry, .set_page = &rs400_gart_set_page, }, .ring = {
@@ -609,6 +617,7 @@ static struct radeon_asic rs600_asic = { .mc_wait_for_idle = &rs600_mc_wait_for_idle, .gart = { .tlb_flush = &rs600_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -677,6 +686,7 @@ static struct radeon_asic rs690_asic = { .mc_wait_for_idle = &rs690_mc_wait_for_idle, .gart = { .tlb_flush = &rs400_gart_tlb_flush,
.get_page_entry = &rs400_gart_get_page_entry, .set_page = &rs400_gart_set_page, }, .ring = {
@@ -745,6 +755,7 @@ static struct radeon_asic rv515_asic = { .mc_wait_for_idle = &rv515_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush,
.get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = {
@@ -811,6 +822,7 @@ static struct radeon_asic r520_asic = { .mc_wait_for_idle = &r520_mc_wait_for_idle, .gart = { .tlb_flush = &rv370_pcie_gart_tlb_flush,
.get_page_entry = &rv370_pcie_gart_get_page_entry, .set_page = &rv370_pcie_gart_set_page, }, .ring = {
@@ -905,6 +917,7 @@ static struct radeon_asic r600_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -990,6 +1003,7 @@ static struct radeon_asic rv6xx_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1081,6 +1095,7 @@ static struct radeon_asic rs780_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1185,6 +1200,7 @@ static struct radeon_asic rv770_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &r600_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1303,6 +1319,7 @@ static struct radeon_asic evergreen_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1395,6 +1412,7 @@ static struct radeon_asic sumo_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1486,6 +1504,7 @@ static struct radeon_asic btc_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &evergreen_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .ring = {
@@ -1621,6 +1640,7 @@ static struct radeon_asic cayman_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &cayman_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = {
@@ -1724,6 +1744,7 @@ static struct radeon_asic trinity_asic = { .get_gpu_clock_counter = &r600_get_gpu_clock_counter, .gart = { .tlb_flush = &cayman_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = {
@@ -1857,6 +1878,7 @@ static struct radeon_asic si_asic = { .get_gpu_clock_counter = &si_get_gpu_clock_counter, .gart = { .tlb_flush = &si_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = {
@@ -2018,6 +2040,7 @@ static struct radeon_asic ci_asic = { .get_gpu_clock_counter = &cik_get_gpu_clock_counter, .gart = { .tlb_flush = &cik_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = {
@@ -2125,6 +2148,7 @@ static struct radeon_asic kv_asic = { .get_gpu_clock_counter = &cik_get_gpu_clock_counter, .gart = { .tlb_flush = &cik_pcie_gart_tlb_flush,
.get_page_entry = &rs600_gart_get_page_entry, .set_page = &rs600_gart_set_page, }, .vm = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 2a45d54..8d787d1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -67,8 +67,9 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); int r100_asic_reset(struct radeon_device *rdev); u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc); void r100_pci_gart_tlb_flush(struct radeon_device *rdev); +uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags); void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags);
uint64_t entry);
void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring); int r100_irq_set(struct radeon_device *rdev); int r100_irq_process(struct radeon_device *rdev); @@ -172,8 +173,9 @@ extern void r300_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); extern int r300_cs_parse(struct radeon_cs_parser *p); extern void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev); +extern uint64_t rv370_pcie_gart_get_page_entry(uint64_t addr, uint32_t flags); extern void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags);
uint64_t entry);
extern void rv370_set_pcie_lanes(struct radeon_device *rdev, int lanes); extern int rv370_get_pcie_lanes(struct radeon_device *rdev); extern void r300_set_reg_safe(struct radeon_device *rdev); @@ -208,8 +210,9 @@ extern void rs400_fini(struct radeon_device *rdev); extern int rs400_suspend(struct radeon_device *rdev); extern int rs400_resume(struct radeon_device *rdev); void rs400_gart_tlb_flush(struct radeon_device *rdev); +uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags); void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags);
uint64_t entry);
uint32_t rs400_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs400_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int rs400_gart_init(struct radeon_device *rdev); @@ -232,8 +235,9 @@ int rs600_irq_process(struct radeon_device *rdev); void rs600_irq_disable(struct radeon_device *rdev); u32 rs600_get_vblank_counter(struct radeon_device *rdev, int crtc); void rs600_gart_tlb_flush(struct radeon_device *rdev); +uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags); void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags);
uint64_t entry);
uint32_t rs600_mc_rreg(struct radeon_device *rdev, uint32_t reg); void rs600_mc_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); void rs600_bandwidth_update(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 0ec6516..bd7519f 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -774,6 +774,8 @@ int radeon_dummy_page_init(struct radeon_device *rdev) rdev->dummy_page.page = NULL; return -ENOMEM; }
rdev->dummy_page.entry = radeon_gart_get_page_entry(rdev->dummy_page.addr,
RADEON_GART_PAGE_DUMMY); return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 84146d5..a530932 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -228,7 +228,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, unsigned t; unsigned p; int i, j;
u64 page_base; if (!rdev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n");
@@ -240,13 +239,12 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, if (rdev->gart.pages[p]) { rdev->gart.pages[p] = NULL; rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
page_base = rdev->gart.pages_addr[p]; for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
rdev->gart.pages_entry[t] = rdev->dummy_page.entry; if (rdev->gart.ptr) {
radeon_gart_set_page(rdev, t, page_base,
RADEON_GART_PAGE_DUMMY);
radeon_gart_set_page(rdev, t,
rdev->dummy_page.entry); }
page_base += RADEON_GPU_PAGE_SIZE; } } }
@@ -274,7 +272,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, { unsigned t; unsigned p;
uint64_t page_base;
uint64_t page_base, page_entry; int i, j; if (!rdev->gart.ready) {
@@ -287,12 +285,14 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, for (i = 0; i < pages; i++, p++) { rdev->gart.pages_addr[p] = dma_addr[i]; rdev->gart.pages[p] = pagelist[i];
if (rdev->gart.ptr) {
page_base = rdev->gart.pages_addr[p];
for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
radeon_gart_set_page(rdev, t, page_base, flags);
page_base += RADEON_GPU_PAGE_SIZE;
page_base = dma_addr[i];
for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
page_entry = radeon_gart_get_page_entry(page_base, flags);
rdev->gart.pages_entry[t] = page_entry;
if (rdev->gart.ptr) {
radeon_gart_set_page(rdev, t, page_entry); }
page_base += RADEON_GPU_PAGE_SIZE; } } mb();
@@ -340,10 +340,17 @@ int radeon_gart_init(struct radeon_device *rdev) radeon_gart_fini(rdev); return -ENOMEM; }
rdev->gart.pages_entry = vmalloc(sizeof(uint64_t) *
rdev->gart.num_gpu_pages);
if (rdev->gart.pages_entry == NULL) {
radeon_gart_fini(rdev);
return -ENOMEM;
} /* set GART entry to point to the dummy page by default */
for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
for (i = 0; i < rdev->gart.num_cpu_pages; i++) rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
}
for (i = 0; i < rdev->gart.num_gpu_pages; i++)
rdev->gart.pages_entry[i] = rdev->dummy_page.entry; return 0;
}
@@ -356,15 +363,17 @@ int radeon_gart_init(struct radeon_device *rdev) */ void radeon_gart_fini(struct radeon_device *rdev) {
if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
if (rdev->gart.ready) { /* unbind pages */ radeon_gart_unbind(rdev, 0, rdev->gart.num_cpu_pages); } rdev->gart.ready = false; vfree(rdev->gart.pages); vfree(rdev->gart.pages_addr);
vfree(rdev->gart.pages_entry); rdev->gart.pages = NULL; rdev->gart.pages_addr = NULL;
rdev->gart.pages_entry = NULL; radeon_dummy_page_fini(rdev);
} diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index c5799f16..34e3235 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -212,11 +212,9 @@ void rs400_gart_fini(struct radeon_device *rdev) #define RS400_PTE_WRITEABLE (1 << 2) #define RS400_PTE_READABLE (1 << 3)
-void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags)
+uint64_t rs400_gart_get_page_entry(uint64_t addr, uint32_t flags) { uint32_t entry;
u32 *gtt = rdev->gart.ptr; entry = (lower_32_bits(addr) & PAGE_MASK) | ((upper_32_bits(addr) & 0xff) << 4);
@@ -226,8 +224,14 @@ void rs400_gart_set_page(struct radeon_device *rdev, unsigned i, entry |= RS400_PTE_WRITEABLE; if (!(flags & RADEON_GART_PAGE_SNOOP)) entry |= RS400_PTE_UNSNOOPED;
entry = cpu_to_le32(entry);
gtt[i] = entry;
return entry;
+}
+void rs400_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t entry)
+{
u32 *gtt = rdev->gart.ptr;
gtt[i] = cpu_to_le32(lower_32_bits(entry));
}
int rs400_mc_wait_for_idle(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 9acb1c3..74bce91 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -625,11 +625,8 @@ static void rs600_gart_fini(struct radeon_device *rdev) radeon_gart_table_vram_free(rdev); }
-void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t addr, uint32_t flags)
+uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags) {
void __iomem *ptr = (void *)rdev->gart.ptr;
addr = addr & 0xFFFFFFFFFFFFF000ULL; addr |= R600_PTE_SYSTEM; if (flags & RADEON_GART_PAGE_VALID)
@@ -640,7 +637,14 @@ void rs600_gart_set_page(struct radeon_device *rdev, unsigned i, addr |= R600_PTE_WRITEABLE; if (flags & RADEON_GART_PAGE_SNOOP) addr |= R600_PTE_SNOOPED;
writeq(addr, ptr + (i * 8));
return addr;
+}
+void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t entry)
+{
void __iomem *ptr = (void *)rdev->gart.ptr;
writeq(entry, ptr + (i * 8));
}
int rs600_irq_set(struct radeon_device *rdev)
2.1.4
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org