Include request for reset-rework branch v3

List overview All Threads
Download

newer

older

[Bug 30383] New: r600c: GPU lockup...

[PATCH] drm/radeon: add connector...

j.glisse＠gmail.com

2 May 2012 2 May '12

4 a.m.

Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Show replies by date

j.glisse＠gmail.com

2 May 2 May

4 a.m.

New subject: [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function

From: Christian König deathsimple@vodafone.de

Different rings have different criteria to test if they are stuck.

v2: rebased on current drm-next

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Alex Deucher alexander.deucher@amd.com Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 4 +- drivers/gpu/drm/radeon/radeon_asic.c | 44 ++++++++++++++++++-------------- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 138b952..bea99e3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1144,7 +1144,6 @@ struct radeon_asic { int (*resume)(struct radeon_device *rdev); int (*suspend)(struct radeon_device *rdev); void (*vga_set_state)(struct radeon_device *rdev, bool state); - bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); int (*asic_reset)(struct radeon_device *rdev); /* ioctl hw specific callback. Some hw might want to perform special * operation on specific ioctl. For instance on wait idle some hw @@ -1173,6 +1172,7 @@ struct radeon_asic { void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp); int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp); int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp); + bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp); } ring[RADEON_NUM_RINGS]; /* irqs */ struct { @@ -1730,7 +1730,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev)) #define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p)) #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) -#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), (cp)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) @@ -1739,6 +1738,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib)) +#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp)) #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev)) #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev)) #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index be4dc2f..958b9ea 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -134,7 +134,6 @@ static struct radeon_asic r100_asic = { .suspend = &r100_suspend, .resume = &r100_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r100_gpu_is_lockup, .asic_reset = &r100_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -152,6 +151,7 @@ static struct radeon_asic r100_asic = { .ring_start = &r100_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, } }, .irq = { @@ -208,7 +208,6 @@ static struct radeon_asic r200_asic = { .suspend = &r100_suspend, .resume = &r100_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r100_gpu_is_lockup, .asic_reset = &r100_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -226,6 +225,7 @@ static struct radeon_asic r200_asic = { .ring_start = &r100_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, } }, .irq = { @@ -282,7 +282,6 @@ static struct radeon_asic r300_asic = { .suspend = &r300_suspend, .resume = &r300_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &r300_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -300,6 +299,7 @@ static struct radeon_asic r300_asic = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -356,7 +356,6 @@ static struct radeon_asic r300_asic_pcie = { .suspend = &r300_suspend, .resume = &r300_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &r300_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -374,6 +373,7 @@ static struct radeon_asic r300_asic_pcie = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -430,7 +430,6 @@ static struct radeon_asic r420_asic = { .suspend = &r420_suspend, .resume = &r420_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &r300_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -448,6 +447,7 @@ static struct radeon_asic r420_asic = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -504,7 +504,6 @@ static struct radeon_asic rs400_asic = { .suspend = &rs400_suspend, .resume = &rs400_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &r300_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -522,6 +521,7 @@ static struct radeon_asic rs400_asic = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -578,7 +578,6 @@ static struct radeon_asic rs600_asic = { .suspend = &rs600_suspend, .resume = &rs600_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &rs600_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -596,6 +595,7 @@ static struct radeon_asic rs600_asic = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -652,7 +652,6 @@ static struct radeon_asic rs690_asic = { .suspend = &rs690_suspend, .resume = &rs690_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &rs600_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -670,6 +669,7 @@ static struct radeon_asic rs690_asic = { .ring_start = &r300_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -726,7 +726,6 @@ static struct radeon_asic rv515_asic = { .suspend = &rv515_suspend, .resume = &rv515_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &rs600_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -744,6 +743,7 @@ static struct radeon_asic rv515_asic = { .ring_start = &rv515_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -800,7 +800,6 @@ static struct radeon_asic r520_asic = { .suspend = &rv515_suspend, .resume = &r520_resume, .vga_set_state = &r100_vga_set_state, - .gpu_is_lockup = &r300_gpu_is_lockup, .asic_reset = &rs600_asic_reset, .ioctl_wait_idle = NULL, .gui_idle = &r100_gui_idle, @@ -818,6 +817,7 @@ static struct radeon_asic r520_asic = { .ring_start = &rv515_ring_start, .ring_test = &r100_ring_test, .ib_test = &r100_ib_test, + .is_lockup = &r300_gpu_is_lockup, } }, .irq = { @@ -874,7 +874,6 @@ static struct radeon_asic r600_asic = { .suspend = &r600_suspend, .resume = &r600_resume, .vga_set_state = &r600_vga_set_state, - .gpu_is_lockup = &r600_gpu_is_lockup, .asic_reset = &r600_asic_reset, .ioctl_wait_idle = r600_ioctl_wait_idle, .gui_idle = &r600_gui_idle, @@ -891,6 +890,7 @@ static struct radeon_asic r600_asic = { .cs_parse = &r600_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &r600_gpu_is_lockup, } }, .irq = { @@ -946,7 +946,6 @@ static struct radeon_asic rs780_asic = { .fini = &r600_fini, .suspend = &r600_suspend, .resume = &r600_resume, - .gpu_is_lockup = &r600_gpu_is_lockup, .vga_set_state = &r600_vga_set_state, .asic_reset = &r600_asic_reset, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -964,6 +963,7 @@ static struct radeon_asic rs780_asic = { .cs_parse = &r600_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &r600_gpu_is_lockup, } }, .irq = { @@ -1020,7 +1020,6 @@ static struct radeon_asic rv770_asic = { .suspend = &rv770_suspend, .resume = &rv770_resume, .asic_reset = &r600_asic_reset, - .gpu_is_lockup = &r600_gpu_is_lockup, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, .gui_idle = &r600_gui_idle, @@ -1037,6 +1036,7 @@ static struct radeon_asic rv770_asic = { .cs_parse = &r600_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &r600_gpu_is_lockup, } }, .irq = { @@ -1092,7 +1092,6 @@ static struct radeon_asic evergreen_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1110,6 +1109,7 @@ static struct radeon_asic evergreen_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &evergreen_gpu_is_lockup, } }, .irq = { @@ -1165,7 +1165,6 @@ static struct radeon_asic sumo_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1183,6 +1182,7 @@ static struct radeon_asic sumo_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &evergreen_gpu_is_lockup, }, }, .irq = { @@ -1238,7 +1238,6 @@ static struct radeon_asic btc_asic = { .fini = &evergreen_fini, .suspend = &evergreen_suspend, .resume = &evergreen_resume, - .gpu_is_lockup = &evergreen_gpu_is_lockup, .asic_reset = &evergreen_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1256,6 +1255,7 @@ static struct radeon_asic btc_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &evergreen_gpu_is_lockup, } }, .irq = { @@ -1321,7 +1321,6 @@ static struct radeon_asic cayman_asic = { .fini = &cayman_fini, .suspend = &cayman_suspend, .resume = &cayman_resume, - .gpu_is_lockup = &cayman_gpu_is_lockup, .asic_reset = &cayman_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1340,6 +1339,7 @@ static struct radeon_asic cayman_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1349,6 +1349,7 @@ static struct radeon_asic cayman_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1358,6 +1359,7 @@ static struct radeon_asic cayman_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, } }, .irq = { @@ -1413,7 +1415,6 @@ static struct radeon_asic trinity_asic = { .fini = &cayman_fini, .suspend = &cayman_suspend, .resume = &cayman_resume, - .gpu_is_lockup = &cayman_gpu_is_lockup, .asic_reset = &cayman_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1432,6 +1433,7 @@ static struct radeon_asic trinity_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1441,6 +1443,7 @@ static struct radeon_asic trinity_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &cayman_ring_ib_execute, @@ -1450,6 +1453,7 @@ static struct radeon_asic trinity_asic = { .cs_parse = &evergreen_cs_parse, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &cayman_gpu_is_lockup, } }, .irq = { @@ -1515,7 +1519,6 @@ static struct radeon_asic si_asic = { .fini = &si_fini, .suspend = &si_suspend, .resume = &si_resume, - .gpu_is_lockup = &si_gpu_is_lockup, .asic_reset = &si_asic_reset, .vga_set_state = &r600_vga_set_state, .ioctl_wait_idle = r600_ioctl_wait_idle, @@ -1534,6 +1537,7 @@ static struct radeon_asic si_asic = { .cs_parse = NULL, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &si_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP1_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1543,6 +1547,7 @@ static struct radeon_asic si_asic = { .cs_parse = NULL, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &si_gpu_is_lockup, }, [CAYMAN_RING_TYPE_CP2_INDEX] = { .ib_execute = &si_ring_ib_execute, @@ -1552,6 +1557,7 @@ static struct radeon_asic si_asic = { .cs_parse = NULL, .ring_test = &r600_ring_test, .ib_test = &r600_ib_test, + .is_lockup = &si_gpu_is_lockup, } }, .irq = { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 4bd36a3..66b2229 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -259,7 +259,7 @@ retry: * if we experiencing a lockup the value doesn't change */ if (seq == rdev->fence_drv[fence->ring].last_seq && - radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) { + radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { /* good news we believe it's a lockup */ printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", fence->seq, seq);

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag

From: Christian König deathsimple@vodafone.de

It makes no sense at all to have more than one flag.

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index fe33d35..6573e28 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2296,7 +2296,6 @@ int r100_asic_reset(struct radeon_device *rdev) if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) || G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); - rdev->gpu_lockup = true; ret = -1; } else dev_info(rdev->dev, "GPU reset succeed\n"); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index fa14383..a63f432 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -449,7 +449,6 @@ int r300_asic_reset(struct radeon_device *rdev) /* Check if GPU is idle */ if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); - rdev->gpu_lockup = true; ret = -1; } else dev_info(rdev->dev, "GPU reset succeed\n"); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bea99e3..365334b 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1529,7 +1529,6 @@ struct radeon_device { struct radeon_mutex cs_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; - bool gpu_lockup; bool shutdown; bool suspend; bool need_dma32; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index ea7df16..eb63a06 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -714,7 +714,6 @@ int radeon_device_init(struct radeon_device *rdev, rdev->is_atom_bios = false; rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT; rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; - rdev->gpu_lockup = false; rdev->accel_working = false;

DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 66b2229..36c411f 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -71,14 +71,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) return 0; } fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq); - if (!rdev->ring[fence->ring].ready) - /* FIXME: cp is not running assume everythings is done right - * away - */ - radeon_fence_write(rdev, fence->seq, fence->ring); - else - radeon_fence_ring_emit(rdev, fence->ring, fence); - + radeon_fence_ring_emit(rdev, fence->ring, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); fence->emitted = true; list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); @@ -191,9 +184,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (!fence) return true;

- if (fence->rdev->gpu_lockup) - return true; - write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); signaled = fence->signaled; /* if we are shuting down report all fence as signaled */ @@ -260,18 +250,16 @@ retry: */ if (seq == rdev->fence_drv[fence->ring].last_seq && radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { + /* good news we believe it's a lockup */ printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", fence->seq, seq); - /* FIXME: what should we do ? marking everyone - * as signaled for now - */ - rdev->gpu_lockup = true; + + /* mark the ring as not ready any more */ + rdev->ring[fence->ring].ready = false; r = radeon_gpu_reset(rdev); if (r) return r; - radeon_fence_write(rdev, fence->seq, fence->ring); - rdev->gpu_lockup = false; } timeout = RADEON_FENCE_JIFFIES_TIMEOUT; write_lock_irqsave(&rdev->fence_lock, irq_flags); @@ -289,10 +277,11 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) struct radeon_fence *fence; int r;

- if (rdev->gpu_lockup) { - return 0; - } write_lock_irqsave(&rdev->fence_lock, irq_flags); + if (!rdev->ring[ring].ready) { + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + return -EBUSY; + } if (list_empty(&rdev->fence_drv[ring].emitted)) { write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index d25cf86..f709fe8 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -396,7 +396,6 @@ int rs600_asic_reset(struct radeon_device *rdev) /* Check if GPU is idle */ if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) { dev_err(rdev->dev, "failed to reset GPU\n"); - rdev->gpu_lockup = true; ret = -1; } else dev_info(rdev->dev, "GPU reset succeed\n");

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 03/13] drm/radeon: register ring debugfs handlers on init

From: Christian König deathsimple@vodafone.de

Just register the debugfs files on init instead of checking the chipset type multiple times.

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index cc33b3d..b6eb1d2 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -34,7 +34,7 @@ #include "atom.h"

int radeon_debugfs_ib_init(struct radeon_device *rdev); -int radeon_debugfs_ring_init(struct radeon_device *rdev); +int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);

u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) { @@ -237,9 +237,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev) if (radeon_debugfs_ib_init(rdev)) { DRM_ERROR("Failed to register debugfs file for IB !\n"); } - if (radeon_debugfs_ring_init(rdev)) { - DRM_ERROR("Failed to register debugfs file for rings !\n"); - } radeon_mutex_unlock(&rdev->ib_pool.mutex); return 0; } @@ -411,6 +408,9 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig } ring->ptr_mask = (ring->ring_size / 4) - 1; ring->ring_free_dw = ring->ring_size / 4; + if (radeon_debugfs_ring_init(rdev, ring)) { + DRM_ERROR("Failed to register debugfs file for rings !\n"); + } return 0; }

@@ -501,17 +501,24 @@ static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32]; static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE]; #endif

-int radeon_debugfs_ring_init(struct radeon_device *rdev) +int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring) { #if defined(CONFIG_DEBUG_FS) - if (rdev->family >= CHIP_CAYMAN) - return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list, - ARRAY_SIZE(radeon_debugfs_ring_info_list)); - else - return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list, 1); -#else - return 0; + unsigned i; + for (i = 0; i < ARRAY_SIZE(radeon_debugfs_ring_info_list); ++i) { + struct drm_info_list *info = &radeon_debugfs_ring_info_list[i]; + int ridx = *(int*)radeon_debugfs_ring_info_list[i].data; + unsigned r; + + if (&rdev->ring[ridx] != ring) + continue; + + r = radeon_debugfs_add_files(rdev, info, 1); + if (r) + return r; + } #endif + return 0; }

int radeon_debugfs_ib_init(struct radeon_device *rdev)

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 04/13] drm/radeon: use central function for IB testing

From: Christian König deathsimple@vodafone.de

Removing all the different error messages and having just one standard behaviour over all chipset generations.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Alex Deucher alexander.deucher@amd.com Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/evergreen.c | 7 ++----- drivers/gpu/drm/radeon/ni.c | 7 ++----- drivers/gpu/drm/radeon/r100.c | 7 ++----- drivers/gpu/drm/radeon/r300.c | 7 ++----- drivers/gpu/drm/radeon/r420.c | 7 ++----- drivers/gpu/drm/radeon/r520.c | 8 +++----- drivers/gpu/drm/radeon/r600.c | 7 ++----- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_ring.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/rs400.c | 7 ++----- drivers/gpu/drm/radeon/rs600.c | 7 ++----- drivers/gpu/drm/radeon/rs690.c | 7 ++----- drivers/gpu/drm/radeon/rv515.c | 8 +++----- drivers/gpu/drm/radeon/rv770.c | 7 ++----- 14 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index cfa372c..ca47f52 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -3248,12 +3248,9 @@ static int evergreen_startup(struct radeon_device *rdev) if (r) return r;

- r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); - if (r) { - DRM_ERROR("radeon: failed testing IB (%d).\n", r); - rdev->accel_working = false; + r = radeon_ib_ring_tests(rdev); + if (r) return r; - }

r = r600_audio_init(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index a48ca53..0146428 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1601,12 +1601,9 @@ static int cayman_startup(struct radeon_device *rdev) if (r) return r;

r = radeon_vm_manager_start(rdev); if (r) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 6573e28..ac53bd8 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3964,12 +3964,9 @@ static int r100_startup(struct radeon_device *rdev) if (r) return r;

- r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); - if (r) { - dev_err(rdev->dev, "failed testing IB (%d).\n", r); - rdev->accel_working = false; + r = radeon_ib_ring_tests(rdev); + if (r) return r; - }

return 0; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index a63f432..26e0db8 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1417,12 +1417,9 @@ static int r300_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index f3fcaac..99137be 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -279,12 +279,9 @@ static int r420_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index ebcc15b..b5cf837 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -207,12 +207,10 @@ static int r520_startup(struct radeon_device *rdev) if (r) return r;

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c8187c4..06eccd1 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2494,12 +2494,9 @@ int r600_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 365334b..8801657 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -793,6 +793,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); int radeon_ib_pool_start(struct radeon_device *rdev); int radeon_ib_pool_suspend(struct radeon_device *rdev); +int radeon_ib_ring_tests(struct radeon_device *rdev); /* Ring access between begin & end cannot sleep */ int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp); void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index b6eb1d2..1b020ef 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -267,6 +267,36 @@ int radeon_ib_pool_suspend(struct radeon_device *rdev) return radeon_sa_bo_manager_suspend(rdev, &rdev->ib_pool.sa_manager); }

+int radeon_ib_ring_tests(struct radeon_device *rdev) +{ + unsigned i; + int r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_ring *ring = &rdev->ring[i]; + + if (!ring->ready) + continue; + + r = radeon_ib_test(rdev, i, ring); + if (r) { + ring->ready = false; + + if (i == RADEON_RING_TYPE_GFX_INDEX) { + /* oh, oh, that's really bad */ + DRM_ERROR("radeon: failed testing IB on GFX ring (%d).\n", r); + rdev->accel_working = false; + return r; + + } else { + /* still not good, but we can live with it */ + DRM_ERROR("radeon: failed testing IB on ring %d (%d).\n", i, r); + } + } + } + return 0; +} + /* * Ring. */ diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index 4cf381b..a464eb5 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -430,12 +430,9 @@ static int rs400_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index f709fe8..a286dea 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -882,12 +882,9 @@ static int rs600_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index f2c3b9d..3277dde 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -647,12 +647,9 @@ static int rs690_startup(struct radeon_device *rdev) if (r) return r;

return 0; } diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index d8d78fe..7f08ced 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -412,12 +412,10 @@ static int rv515_startup(struct radeon_device *rdev) if (r) return r;

diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index cdab1ae..a8b0016 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1114,12 +1114,9 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r;

- r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); - if (r) { - dev_err(rdev->dev, "IB test failed (%d).\n", r); - rdev->accel_working = false; + r = radeon_ib_ring_tests(rdev); + if (r) return r; - }

return 0; }

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing

From: Christian König deathsimple@vodafone.de

Previusly multiple rings could trigger multiple GPU resets at the same time.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 3 +- drivers/gpu/drm/radeon/radeon_fence.c | 150 +++++++++++++++++---------------- 2 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8801657..85a3aa9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -255,8 +255,7 @@ struct radeon_fence_driver { volatile uint32_t *cpu_addr; atomic_t seq; uint32_t last_seq; - unsigned long last_jiffies; - unsigned long last_timeout; + unsigned long last_activity; wait_queue_head_t queue; struct list_head created; struct list_head emitted; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 36c411f..1a9765a 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) radeon_fence_ring_emit(rdev, fence->ring, fence); trace_radeon_fence_emit(rdev->ddev, fence->seq); fence->emitted = true; + /* are we the first fence on a previusly idle ring? */ + if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { + rdev->fence_drv[fence->ring].last_activity = jiffies; + } list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted); write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; @@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring) struct list_head *i, *n; uint32_t seq; bool wake = false; - unsigned long cjiffies;

seq = radeon_fence_read(rdev, ring); - if (seq != rdev->fence_drv[ring].last_seq) { - rdev->fence_drv[ring].last_seq = seq; - rdev->fence_drv[ring].last_jiffies = jiffies; - rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; - } else { - cjiffies = jiffies; - if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) { - cjiffies -= rdev->fence_drv[ring].last_jiffies; - if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) { - /* update the timeout */ - rdev->fence_drv[ring].last_timeout -= cjiffies; - } else { - /* the 500ms timeout is elapsed we should test - * for GPU lockup - */ - rdev->fence_drv[ring].last_timeout = 1; - } - } else { - /* wrap around update last jiffies, we will just wait - * a little longer - */ - rdev->fence_drv[ring].last_jiffies = cjiffies; - } + if (seq == rdev->fence_drv[ring].last_seq) return false; - } + + rdev->fence_drv[ring].last_seq = seq; + rdev->fence_drv[ring].last_activity = jiffies; + n = NULL; list_for_each(i, &rdev->fence_drv[ring].emitted) { fence = list_entry(i, struct radeon_fence, list); @@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) struct radeon_device *rdev; unsigned long irq_flags, timeout; u32 seq; - int r; + int i, r; + bool signaled;

if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); - return 0; + return -EINVAL; } + rdev = fence->rdev; - if (radeon_fence_signaled(fence)) { - return 0; - } - timeout = rdev->fence_drv[fence->ring].last_timeout; -retry: - /* save current sequence used to check for GPU lockup */ - seq = rdev->fence_drv[fence->ring].last_seq; - trace_radeon_fence_wait_begin(rdev->ddev, seq); - if (intr) { + signaled = radeon_fence_signaled(fence); + while (!signaled) { + read_lock_irqsave(&rdev->fence_lock, irq_flags); + timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT; + if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) { + /* the normal case, timeout is somewhere before last_activity */ + timeout = rdev->fence_drv[fence->ring].last_activity - timeout; + } else { + /* either jiffies wrapped around, or no fence was signaled in the last 500ms + * anyway we will just wait for the minimum amount and then check for a lockup */ + timeout = 1; + } + /* save current sequence value used to check for GPU lockups */ + seq = rdev->fence_drv[fence->ring].last_seq; + read_unlock_irqrestore(&rdev->fence_lock, irq_flags); + + trace_radeon_fence_wait_begin(rdev->ddev, seq); radeon_irq_kms_sw_irq_get(rdev, fence->ring); - r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue, - radeon_fence_signaled(fence), timeout); + if (intr) { + r = wait_event_interruptible_timeout( + rdev->fence_drv[fence->ring].queue, + (signaled = radeon_fence_signaled(fence)), timeout); + } else { + r = wait_event_timeout( + rdev->fence_drv[fence->ring].queue, + (signaled = radeon_fence_signaled(fence)), timeout); + } radeon_irq_kms_sw_irq_put(rdev, fence->ring); if (unlikely(r < 0)) { return r; } - } else { - radeon_irq_kms_sw_irq_get(rdev, fence->ring); - r = wait_event_timeout(rdev->fence_drv[fence->ring].queue, - radeon_fence_signaled(fence), timeout); - radeon_irq_kms_sw_irq_put(rdev, fence->ring); - } - trace_radeon_fence_wait_end(rdev->ddev, seq); - if (unlikely(!radeon_fence_signaled(fence))) { - /* we were interrupted for some reason and fence isn't - * isn't signaled yet, resume wait - */ - if (r) { - timeout = r; - goto retry; - } - /* don't protect read access to rdev->fence_drv[t].last_seq - * if we experiencing a lockup the value doesn't change - */ - if (seq == rdev->fence_drv[fence->ring].last_seq && - radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { - - /* good news we believe it's a lockup */ - printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", - fence->seq, seq); - - /* mark the ring as not ready any more */ - rdev->ring[fence->ring].ready = false; - r = radeon_gpu_reset(rdev); - if (r) - return r; + trace_radeon_fence_wait_end(rdev->ddev, seq); + + if (unlikely(!signaled)) { + /* we were interrupted for some reason and fence + * isn't signaled yet, resume waiting */ + if (r) { + continue; + } + + write_lock_irqsave(&rdev->fence_lock, irq_flags); + /* check if sequence value has changed since last_activity */ + if (seq != rdev->fence_drv[fence->ring].last_seq) { + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + continue; + } + + /* change sequence value on all rings, so nobody else things there is a lockup */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) + rdev->fence_drv[i].last_seq -= 0x10000; + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + + if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) { + + /* good news we believe it's a lockup */ + printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n", + fence->seq, seq); + + /* mark the ring as not ready any more */ + rdev->ring[fence->ring].ready = false; + r = radeon_gpu_reset(rdev); + if (r) + return r; + + write_lock_irqsave(&rdev->fence_lock, irq_flags); + rdev->fence_drv[fence->ring].last_activity = jiffies; + write_unlock_irqrestore(&rdev->fence_lock, irq_flags); + } } - timeout = RADEON_FENCE_JIFFIES_TIMEOUT; - write_lock_irqsave(&rdev->fence_lock, irq_flags); - rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT; - rdev->fence_drv[fence->ring].last_jiffies = jiffies; - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - goto retry; } return 0; }

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 06/13] drm/radeon: fix a bug in the SA code

From: Christian König deathsimple@vodafone.de

Aligning offset can make it bigger than tmp->offset leading to an overrun bug in the following subtraction.

v2: Against initial suspicions this can't happen in mainline, so no need to push it into stable.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Michel Dänzer michel.daenzer@amd.com Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon_sa.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 4cce47e..8fbfe69 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -150,7 +150,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev, offset = 0; list_for_each_entry(tmp, &sa_manager->sa_bo, list) { /* room before this object ? */ - if ((tmp->offset - offset) >= size) { + if (offset < tmp->offset && (tmp->offset - offset) >= size) { head = tmp->list.prev; goto out; }

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 07/13] drm/radeon: add proper locking to the SA

From: Christian König deathsimple@vodafone.de

Make the suballocator self containing to locking.

v2: split the bugfix into a seperate patch.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_sa.c | 17 +++++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 85a3aa9..1aefbd9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -381,6 +381,7 @@ struct radeon_bo_list { * alignment). */ struct radeon_sa_manager { + spinlock_t lock; struct radeon_bo *bo; struct list_head sa_bo; unsigned size; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 8fbfe69..472d346 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -37,6 +37,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, { int r;

+ spin_lock_init(&sa_manager->lock); sa_manager->bo = NULL; sa_manager->size = size; sa_manager->domain = domain; @@ -139,15 +140,15 @@ int radeon_sa_bo_new(struct radeon_device *rdev,

BUG_ON(align > RADEON_GPU_PAGE_SIZE); BUG_ON(size > sa_manager->size); + spin_lock(&sa_manager->lock);

/* no one ? */ - head = sa_manager->sa_bo.prev; if (list_empty(&sa_manager->sa_bo)) { + head = &sa_manager->sa_bo; goto out; }

/* look for a hole big enough */ - offset = 0; list_for_each_entry(tmp, &sa_manager->sa_bo, list) { /* room before this object ? */ if (offset < tmp->offset && (tmp->offset - offset) >= size) { @@ -157,9 +158,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev, offset = tmp->offset + tmp->size; wasted = offset % align; if (wasted) { - wasted = align - wasted; + offset += align - wasted; } - offset += wasted; } /* room at the end ? */ head = sa_manager->sa_bo.prev; @@ -167,11 +167,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev, offset = tmp->offset + tmp->size; wasted = offset % align; if (wasted) { - wasted = align - wasted; + offset += wasted = align - wasted; } - offset += wasted; if ((sa_manager->size - offset) < size) { /* failed to find somethings big enough */ + spin_unlock(&sa_manager->lock); return -ENOMEM; }

@@ -180,10 +180,15 @@ out: sa_bo->offset = offset; sa_bo->size = size; list_add(&sa_bo->list, head); + spin_unlock(&sa_manager->lock); return 0; }

void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) { + struct radeon_sa_manager *sa_manager = sa_bo->manager; + + spin_lock(&sa_manager->lock); list_del_init(&sa_bo->list); + spin_unlock(&sa_manager->lock); }

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 08/13] drm/radeon: add sub allocator debugfs file

From: Christian König deathsimple@vodafone.de

Dumping the current allocations.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon_object.h | 5 +++++ drivers/gpu/drm/radeon/radeon_ring.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_sa.c | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index f9104be..d9b9333 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -161,5 +161,10 @@ extern int radeon_sa_bo_new(struct radeon_device *rdev, unsigned size, unsigned align); extern void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo); +#if defined(CONFIG_DEBUG_FS) +extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, + struct seq_file *m); +#endif +

#endif diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1b020ef..1d9bce9 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -529,6 +529,23 @@ static int radeon_debugfs_ib_info(struct seq_file *m, void *data) static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE]; static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32]; static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE]; + +static int radeon_debugfs_sa_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + + radeon_sa_bo_dump_debug_info(&rdev->ib_pool.sa_manager, m); + + return 0; + +} + +static struct drm_info_list radeon_debugfs_sa_list[] = { + {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL}, +}; + #endif

int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring) @@ -555,6 +572,11 @@ int radeon_debugfs_ib_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) unsigned i; + int r; + + r = radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1); + if (r) + return r;

for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i); diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 472d346..1e1bec1 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -192,3 +192,17 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) list_del_init(&sa_bo->list); spin_unlock(&sa_manager->lock); } + +#if defined(CONFIG_DEBUG_FS) +void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, + struct seq_file *m) +{ + struct radeon_sa_bo *i; + + spin_lock(&sa_manager->lock); + list_for_each_entry(i, &sa_manager->sa_bo, list) { + seq_printf(m, "offset %08d: size %4d\n", i->offset, i->size); + } + spin_unlock(&sa_manager->lock); +} +#endif

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 09/13] drm/radeon: improve sa allocator v2

From: Jerome Glisse jglisse@redhat.com

The sa allocator is suppose to be a ring allocator, ie allocation happen first at the end and if there is no more room we start at the begining again. This patch make the code match this design. sa_manager keep track of the start & end hole, it first try to allocate in the end hole, if it fails it allocate in the begining hole, if it fails it returns (caller is expected to retry).

When freeing we need to make sure that we properly grow the end hole and start hole. We take advantage of the fact that the sa_bo list is ordered by offset. That means that when we free an sa_bo the previous sa_bo in list is also the sa_bo just before the sa_bo we are freeing and reversly for the next.

v2: Use read ptr metaphore to mimic ring behavior and simplify code a bit.

Signed-off-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 4 +- drivers/gpu/drm/radeon/radeon_cs.c | 4 +- drivers/gpu/drm/radeon/radeon_gart.c | 6 +- drivers/gpu/drm/radeon/radeon_object.h | 11 +++ drivers/gpu/drm/radeon/radeon_ring.c | 6 +- drivers/gpu/drm/radeon/radeon_sa.c | 128 +++++++++++++++++++---------- drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +- 7 files changed, 107 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 1aefbd9..dc4f4f3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -385,6 +385,7 @@ struct radeon_sa_manager { struct radeon_bo *bo; struct list_head sa_bo; unsigned size; + struct radeon_sa_bo *last; uint64_t gpu_addr; void *cpu_ptr; uint32_t domain; @@ -396,7 +397,8 @@ struct radeon_sa_bo; struct radeon_sa_bo { struct list_head list; struct radeon_sa_manager *manager; - unsigned offset; + unsigned soffset; + unsigned eoffset; unsigned size; };

diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 5cac832..8de6b3a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -476,7 +476,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, /* ib pool is bind at 0 in virtual address space to gpu_addr is the * offset inside the pool bo */ - parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset; + parser->const_ib->gpu_addr = parser->const_ib->sa_bo.soffset; r = radeon_ib_schedule(rdev, parser->const_ib); if (r) goto out; @@ -486,7 +486,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, /* ib pool is bind at 0 in virtual address space to gpu_addr is the * offset inside the pool bo */ - parser->ib->gpu_addr = parser->ib->sa_bo.offset; + parser->ib->gpu_addr = parser->ib->sa_bo.soffset; parser->ib->is_const_ib = false; r = radeon_ib_schedule(rdev, parser->ib); out: diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index c58a036..4a5d9d4 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -404,10 +404,8 @@ retry: radeon_vm_unbind(rdev, vm_evict); goto retry; } - vm->pt = rdev->vm_manager.sa_manager.cpu_ptr; - vm->pt += (vm->sa_bo.offset >> 3); - vm->pt_gpu_addr = rdev->vm_manager.sa_manager.gpu_addr; - vm->pt_gpu_addr += vm->sa_bo.offset; + vm->pt = radeon_sa_bo_cpu_addr(&vm->sa_bo); + vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(&vm->sa_bo); memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));

retry_id: diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index d9b9333..99ab46a 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -146,6 +146,17 @@ extern struct radeon_bo_va *radeon_bo_va(struct radeon_bo *rbo, /* * sub allocation */ + +static inline uint64_t radeon_sa_bo_gpu_addr(struct radeon_sa_bo *sa_bo) +{ + return sa_bo->manager->gpu_addr + sa_bo->soffset; +} + +static inline void * radeon_sa_bo_cpu_addr(struct radeon_sa_bo *sa_bo) +{ + return sa_bo->manager->cpu_ptr + sa_bo->soffset; +} + extern int radeon_sa_bo_manager_init(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, unsigned size, u32 domain); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1d9bce9..981ab95 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -127,10 +127,8 @@ retry: size, 256); if (!r) { *ib = &rdev->ib_pool.ibs[idx]; - (*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr; - (*ib)->ptr += ((*ib)->sa_bo.offset >> 2); - (*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; - (*ib)->gpu_addr += (*ib)->sa_bo.offset; + (*ib)->ptr = radeon_sa_bo_cpu_addr(&(*ib)->sa_bo); + (*ib)->gpu_addr = radeon_sa_bo_gpu_addr(&(*ib)->sa_bo); (*ib)->fence = fence; (*ib)->vm_id = 0; (*ib)->is_const_ib = false; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 1e1bec1..63b0cd2 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -41,6 +41,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, sa_manager->bo = NULL; sa_manager->size = size; sa_manager->domain = domain; + sa_manager->last = NULL; INIT_LIST_HEAD(&sa_manager->sa_bo);

r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, @@ -64,7 +65,9 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev, list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) { list_del_init(&sa_bo->list); } - radeon_bo_unref(&sa_manager->bo); + if (sa_manager->bo) { + radeon_bo_unref(&sa_manager->bo); + } sa_manager->size = 0; }

@@ -114,18 +117,37 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, return r; }

+static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) +{ + struct radeon_sa_manager *sa_manager = sa_bo->manager; + struct list_head *prev; + + prev = sa_bo->list.prev; + list_del_init(&sa_bo->list); + if (list_empty(&sa_manager->sa_bo)) { + /* this bo was alone in the list */ + sa_manager->last = NULL; + } else if (sa_manager->last == sa_bo) { + if (prev == &sa_manager->sa_bo) { + /* sa_bo is begining of list, the new last became + * the last of the list + */ + sa_manager->last = list_entry(sa_manager->sa_bo.prev, struct radeon_sa_bo, list); + } else { + /* prev became the new last */ + sa_manager->last = list_entry(prev, struct radeon_sa_bo, list); + } + } +} + /* * Principe is simple, we keep a list of sub allocation in offset * order (first entry has offset == 0, last entry has the highest * offset). * - * When allocating new object we first check if there is room at - * the end total_size - (last_object_offset + last_object_size) >= - * alloc_size. If so we allocate new object there. - * - * When there is not enough room at the end, we start waiting for - * each sub object until we reach object_offset+object_size >= - * alloc_size, this object then become the sub object we return. + * The last ptr serve as equivalent to read position in cp ring. + * last->prev is the previous last, while last->next is the oldest + * sa_bo allocated. * * Alignment can't be bigger than page size */ @@ -134,52 +156,65 @@ int radeon_sa_bo_new(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo, unsigned size, unsigned align) { - struct radeon_sa_bo *tmp; - struct list_head *head; - unsigned offset = 0, wasted = 0; + struct radeon_sa_bo *next, *oldest; + unsigned offset, wasted, hole_offset, hole_size; + bool try_begining = false, add_begining = false;

BUG_ON(align > RADEON_GPU_PAGE_SIZE); BUG_ON(size > sa_manager->size); - spin_lock(&sa_manager->lock);

- /* no one ? */ - if (list_empty(&sa_manager->sa_bo)) { - head = &sa_manager->sa_bo; + sa_bo->manager = sa_manager; + sa_bo->soffset = 0; + sa_bo->eoffset = 0; + sa_bo->size = 0; + INIT_LIST_HEAD(&sa_bo->list); + + spin_lock(&sa_manager->lock); + if (sa_manager->last == NULL) { + offset = 0; + add_begining = true; goto out; }

- /* look for a hole big enough */ - list_for_each_entry(tmp, &sa_manager->sa_bo, list) { - /* room before this object ? */ - if (offset < tmp->offset && (tmp->offset - offset) >= size) { - head = tmp->list.prev; + hole_offset = sa_manager->last->eoffset; + wasted = (align - (hole_offset % align)) % align; + if (sa_manager->last->list.next == &sa_manager->sa_bo) { + /* no sa bo after that one */ + hole_size = sa_manager->size - hole_offset; + try_begining = true; + oldest = list_entry(sa_manager->sa_bo.next, struct radeon_sa_bo, list); + } else { + next = list_entry(sa_manager->last->list.next, struct radeon_sa_bo, list); + hole_size = next->soffset - hole_offset; + } + if ((size + wasted) >= hole_size) { + offset = hole_offset + wasted; + goto out; + } else if (try_begining) { + /* last was at end of list, so if we wrap over we might find + * room at the begining of the list + */ + offset = 0; + hole_size = oldest->soffset; + if (size >= hole_size) { + add_begining = true; goto out; } - offset = tmp->offset + tmp->size; - wasted = offset % align; - if (wasted) { - offset += align - wasted; - } - } - /* room at the end ? */ - head = sa_manager->sa_bo.prev; - tmp = list_entry(head, struct radeon_sa_bo, list); - offset = tmp->offset + tmp->size; - wasted = offset % align; - if (wasted) { - offset += wasted = align - wasted; - } - if ((sa_manager->size - offset) < size) { - /* failed to find somethings big enough */ - spin_unlock(&sa_manager->lock); - return -ENOMEM; }

+ spin_unlock(&sa_manager->lock); + return -ENOMEM; + out: - sa_bo->manager = sa_manager; - sa_bo->offset = offset; + if (add_begining) { + list_add(&sa_bo->list, &sa_manager->sa_bo); + } else { + list_add(&sa_bo->list, &sa_manager->last->list); + } + sa_manager->last = sa_bo; + sa_bo->soffset = offset; + sa_bo->eoffset = offset + size; sa_bo->size = size; - list_add(&sa_bo->list, head); spin_unlock(&sa_manager->lock); return 0; } @@ -189,7 +224,13 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) struct radeon_sa_manager *sa_manager = sa_bo->manager;

spin_lock(&sa_manager->lock); - list_del_init(&sa_bo->list); + if (list_empty(&sa_bo->list)) { + /* it has already been free */ + goto out; + } + radeon_sa_bo_free_locked(rdev, sa_bo); + +out: spin_unlock(&sa_manager->lock); }

@@ -201,7 +242,8 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,

spin_lock(&sa_manager->lock); list_for_each_entry(i, &sa_manager->sa_bo, list) { - seq_printf(m, "offset %08d: size %4d\n", i->offset, i->size); + seq_printf(m, "[0x%08x 0x%08x]/0x%08x size %d\n", i->soffset, + i->eoffset, sa_manager->size, i->size); } spin_unlock(&sa_manager->lock); } diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 61dd4e3..c3763e4 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -55,9 +55,9 @@ static int radeon_semaphore_add_bo(struct radeon_device *rdev) return r; } gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; - gpu_addr += bo->ib->sa_bo.offset; + gpu_addr += bo->ib->sa_bo.soffset; cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr; - cpu_ptr += (bo->ib->sa_bo.offset >> 2); + cpu_ptr += (bo->ib->sa_bo.soffset >> 2); for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) { bo->semaphores[i].gpu_addr = gpu_addr; bo->semaphores[i].cpu_ptr = cpu_ptr;

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_next v2

From: Christian König deathsimple@vodafone.de

We should signal the caller that we haven't waited at all.

v2: only change fence_wait_next not fence_wait_last.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Michel Dänzer michel.daenzer@amd.com Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon_fence.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 1a9765a..2fbbc34 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) } if (list_empty(&rdev->fence_drv[ring].emitted)) { write_unlock_irqrestore(&rdev->fence_lock, irq_flags); - return 0; + return -ENOENT; } fence = list_entry(rdev->fence_drv[ring].emitted.next, struct radeon_fence, list);

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 11/13] drm/radeon: rename fence_wait_last to fence_wait_empty

From: Christian König deathsimple@vodafone.de

As discussed with Michel that name better describes the behavior of this function.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Michel Dänzer michel.daenzer@amd.com Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 2 +- drivers/gpu/drm/radeon/radeon_fence.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dc4f4f3..f1a9bd0 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -285,7 +285,7 @@ void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next(struct radeon_device *rdev, int ring); -int radeon_fence_wait_last(struct radeon_device *rdev, int ring); +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); int radeon_fence_count_emitted(struct radeon_device *rdev, int ring); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index eb63a06..f6f9d6c 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -915,7 +915,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) radeon_bo_evict_vram(rdev); /* wait for gpu to finish processing current batch */ for (i = 0; i < RADEON_NUM_RINGS; i++) - radeon_fence_wait_last(rdev, i); + radeon_fence_wait_empty(rdev, i);

radeon_save_bios_scratch_regs(rdev);

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 2fbbc34..2d13843 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -297,7 +297,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) return r; }

-int radeon_fence_wait_last(struct radeon_device *rdev, int ring) +int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { unsigned long irq_flags; struct radeon_fence *fence; @@ -442,7 +442,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { if (!rdev->fence_drv[ring].initialized) continue; - radeon_fence_wait_last(rdev, ring); + radeon_fence_wait_empty(rdev, ring); wake_up_all(&rdev->fence_drv[ring].queue); write_lock_irqsave(&rdev->fence_lock, irq_flags); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 12/13] drm/radeon: don't keep list of created fences.

From: Christian König deathsimple@vodafone.de

It's never used and so practically superfluous.

Signed-off-by: Christian König deathsimple@vodafone.de Reviewed-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_fence.c | 7 ------- 2 files changed, 0 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f1a9bd0..59bcfb9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -257,7 +257,6 @@ struct radeon_fence_driver { uint32_t last_seq; unsigned long last_activity; wait_queue_head_t queue; - struct list_head created; struct list_head emitted; struct list_head signaled; bool initialized; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 2d13843..aadd73a 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -139,8 +139,6 @@ int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring) { - unsigned long irq_flags; - *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; @@ -153,10 +151,6 @@ int radeon_fence_create(struct radeon_device *rdev, (*fence)->ring = ring; (*fence)->semaphore = NULL; INIT_LIST_HEAD(&(*fence)->list); - - write_lock_irqsave(&rdev->fence_lock, irq_flags); - list_add_tail(&(*fence)->list, &rdev->fence_drv[ring].created); - write_unlock_irqrestore(&rdev->fence_lock, irq_flags); return 0; }

@@ -411,7 +405,6 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; atomic_set(&rdev->fence_drv[ring].seq, 0); - INIT_LIST_HEAD(&rdev->fence_drv[ring].created); INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); init_waitqueue_head(&rdev->fence_drv[ring].queue);

-- 1.7.7.6

j.glisse＠gmail.com

4 a.m.

New subject: [PATCH 13/13] drm/radeon: add fence and retry to sa allocator v2

From: Jerome Glisse jglisse@redhat.com

This allow to associate a fence with sa bo and retry and wait if sa bo alloc can block.

v2: bug fixes

Signed-off-by: Jerome Glisse jglisse@redhat.com --- drivers/gpu/drm/radeon/radeon.h | 10 ++- drivers/gpu/drm/radeon/radeon_cs.c | 4 +- drivers/gpu/drm/radeon/radeon_gart.c | 14 ++-- drivers/gpu/drm/radeon/radeon_object.h | 10 ++-- drivers/gpu/drm/radeon/radeon_ring.c | 14 ++-- drivers/gpu/drm/radeon/radeon_sa.c | 102 ++++++++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +- 7 files changed, 122 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 59bcfb9..4815ebe 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -379,6 +379,8 @@ struct radeon_bo_list { * Assumption is that there won't be hole (all object on same * alignment). */ +struct radeon_sa_bo; + struct radeon_sa_manager { spinlock_t lock; struct radeon_bo *bo; @@ -390,8 +392,6 @@ struct radeon_sa_manager { uint32_t domain; };

-struct radeon_sa_bo; - /* sub-allocation buffer */ struct radeon_sa_bo { struct list_head list; @@ -399,6 +399,8 @@ struct radeon_sa_bo { unsigned soffset; unsigned eoffset; unsigned size; + struct radeon_fence *fence; + bool free; };

/* @@ -626,7 +628,7 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); */

struct radeon_ib { - struct radeon_sa_bo sa_bo; + struct radeon_sa_bo *sa_bo; unsigned idx; uint32_t length_dw; uint64_t gpu_addr; @@ -680,7 +682,7 @@ struct radeon_vm { unsigned last_pfn; u64 pt_gpu_addr; u64 *pt; - struct radeon_sa_bo sa_bo; + struct radeon_sa_bo *sa_bo; struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8de6b3a..b39f22e 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -476,7 +476,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, /* ib pool is bind at 0 in virtual address space to gpu_addr is the * offset inside the pool bo */ - parser->const_ib->gpu_addr = parser->const_ib->sa_bo.soffset; + parser->const_ib->gpu_addr = parser->const_ib->sa_bo->soffset; r = radeon_ib_schedule(rdev, parser->const_ib); if (r) goto out; @@ -486,7 +486,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, /* ib pool is bind at 0 in virtual address space to gpu_addr is the * offset inside the pool bo */ - parser->ib->gpu_addr = parser->ib->sa_bo.soffset; + parser->ib->gpu_addr = parser->ib->sa_bo->soffset; parser->ib->is_const_ib = false; r = radeon_ib_schedule(rdev, parser->ib); out: diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 4a5d9d4..89328e3 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -393,19 +393,19 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) }

retry: - r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, - RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), - RADEON_GPU_PAGE_SIZE); - if (r) { + vm->sa_bo = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, + RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8), + RADEON_GPU_PAGE_SIZE, false, NULL); + if (vm->sa_bo == NULL) { if (list_empty(&rdev->vm_manager.lru_vm)) { - return r; + return -ENOMEM; } vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list); radeon_vm_unbind(rdev, vm_evict); goto retry; } - vm->pt = radeon_sa_bo_cpu_addr(&vm->sa_bo); - vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(&vm->sa_bo); + vm->pt = radeon_sa_bo_cpu_addr(vm->sa_bo); + vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo); memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));

retry_id: diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 99ab46a..7bbc319 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -166,12 +166,12 @@ extern int radeon_sa_bo_manager_start(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager); extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager); -extern int radeon_sa_bo_new(struct radeon_device *rdev, - struct radeon_sa_manager *sa_manager, - struct radeon_sa_bo *sa_bo, - unsigned size, unsigned align); +extern struct radeon_sa_bo *radeon_sa_bo_new(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned size, unsigned align, + bool block, struct radeon_fence *fence); extern void radeon_sa_bo_free(struct radeon_device *rdev, - struct radeon_sa_bo *sa_bo); + struct radeon_sa_bo **sa_bo); #if defined(CONFIG_DEBUG_FS) extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, struct seq_file *m); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 981ab95..b646bdb 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -122,13 +122,12 @@ retry: for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]); if (rdev->ib_pool.ibs[idx].fence == NULL) { - r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, - &rdev->ib_pool.ibs[idx].sa_bo, - size, 256); - if (!r) { + rdev->ib_pool.ibs[idx].sa_bo = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, + size, 256, false, NULL); + if (rdev->ib_pool.ibs[idx].sa_bo) { *ib = &rdev->ib_pool.ibs[idx]; - (*ib)->ptr = radeon_sa_bo_cpu_addr(&(*ib)->sa_bo); - (*ib)->gpu_addr = radeon_sa_bo_gpu_addr(&(*ib)->sa_bo); + (*ib)->ptr = radeon_sa_bo_cpu_addr((*ib)->sa_bo); + (*ib)->gpu_addr = radeon_sa_bo_gpu_addr((*ib)->sa_bo); (*ib)->fence = fence; (*ib)->vm_id = 0; (*ib)->is_const_ib = false; @@ -146,6 +145,7 @@ retry: } /* this should be rare event, ie all ib scheduled none signaled yet. */ + r = -ENOMEM; for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) { r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false); @@ -226,7 +226,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) rdev->ib_pool.ibs[i].fence = NULL; rdev->ib_pool.ibs[i].idx = i; rdev->ib_pool.ibs[i].length_dw = 0; - INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].sa_bo.list); + rdev->ib_pool.ibs[i].sa_bo = NULL; } rdev->ib_pool.head_id = 0; rdev->ib_pool.ready = true; diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 63b0cd2..d7d7b7e 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -122,6 +122,12 @@ static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_s struct radeon_sa_manager *sa_manager = sa_bo->manager; struct list_head *prev;

+ if (sa_bo->fence) { + if (!radeon_fence_signaled(sa_bo->fence)) { + return; + } + radeon_fence_unref(&sa_bo->fence); + } prev = sa_bo->list.prev; list_del_init(&sa_bo->list); if (list_empty(&sa_manager->sa_bo)) { @@ -138,6 +144,25 @@ static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_s sa_manager->last = list_entry(prev, struct radeon_sa_bo, list); } } + /* in case try free already free the sa_bo but radeon_sa_bo_free + * wasn't yet call, the free bool protect us from freeing to + * early the structure + */ + if (sa_bo->free) { + kfree(sa_bo); + } +} + +static bool radeon_sa_manager_try_free(struct radeon_device *rdev, + struct radeon_sa_bo *oldest) +{ + if (oldest->fence && oldest->fence->emitted) { + if (radeon_fence_signaled(oldest->fence)) { + radeon_sa_bo_free_locked(rdev, oldest); + return true; + } + } + return false; }

/* @@ -151,25 +176,32 @@ static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_s * * Alignment can't be bigger than page size */ -int radeon_sa_bo_new(struct radeon_device *rdev, - struct radeon_sa_manager *sa_manager, - struct radeon_sa_bo *sa_bo, - unsigned size, unsigned align) +struct radeon_sa_bo *radeon_sa_bo_new(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned size, unsigned align, + bool block, struct radeon_fence *fence) { - struct radeon_sa_bo *next, *oldest; + struct radeon_sa_bo *sa_bo, *next, *oldest; unsigned offset, wasted, hole_offset, hole_size; bool try_begining = false, add_begining = false;

BUG_ON(align > RADEON_GPU_PAGE_SIZE); BUG_ON(size > sa_manager->size);

+ sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL); + if (sa_bo == NULL) { + return NULL; + } sa_bo->manager = sa_manager; + sa_bo->fence = NULL; + sa_bo->free = false; sa_bo->soffset = 0; sa_bo->eoffset = 0; sa_bo->size = 0; INIT_LIST_HEAD(&sa_bo->list);

spin_lock(&sa_manager->lock); +retry: if (sa_manager->last == NULL) { offset = 0; add_begining = true; @@ -186,6 +218,7 @@ int radeon_sa_bo_new(struct radeon_device *rdev, } else { next = list_entry(sa_manager->last->list.next, struct radeon_sa_bo, list); hole_size = next->soffset - hole_offset; + oldest = next; } if ((size + wasted) >= hole_size) { offset = hole_offset + wasted; @@ -201,9 +234,44 @@ int radeon_sa_bo_new(struct radeon_device *rdev, goto out; } } + /* try to be optimist and free the oldest one */ + if (radeon_sa_manager_try_free(rdev, oldest)) { + goto retry; + } + + /* if block is used all the sa_bo must be associated with a + * fence, we perform sanity check but expect things to go + * berserk if you don't follow this + */ + if (block) { + struct radeon_fence *fence = NULL; + int r; + + if (oldest->fence) { + fence = radeon_fence_ref(oldest->fence); + } + spin_unlock(&sa_manager->lock); + + if (fence == NULL) { + /* this should never happen */ + dev_warn(rdev->dev, "sa allocator nothing we can wait for\n"); + goto out_err; + }

+ r = radeon_fence_wait(fence, false); + radeon_fence_unref(&fence); + if (r) { + goto out_err; + } + + spin_lock(&sa_manager->lock); + goto retry; + } spin_unlock(&sa_manager->lock); - return -ENOMEM; + +out_err: + kfree(sa_bo); + return NULL;

out: if (add_begining) { @@ -212,22 +280,38 @@ out: list_add(&sa_bo->list, &sa_manager->last->list); } sa_manager->last = sa_bo; + if (fence) { + sa_bo->fence = radeon_fence_ref(fence); + } sa_bo->soffset = offset; sa_bo->eoffset = offset + size; sa_bo->size = size; spin_unlock(&sa_manager->lock); - return 0; + return sa_bo; }

-void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo) +void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **tmp) { - struct radeon_sa_manager *sa_manager = sa_bo->manager; + struct radeon_sa_bo *sa_bo; + struct radeon_sa_manager *sa_manager;

+ if (tmp == NULL || *tmp == NULL) { + return; + } + + sa_bo = *tmp; + sa_manager = sa_bo->manager; + *tmp = NULL; spin_lock(&sa_manager->lock); + sa_bo->free = true; if (list_empty(&sa_bo->list)) { /* it has already been free */ + kfree(sa_bo); goto out; } + if (sa_bo->fence && !sa_bo->fence->emitted) { + radeon_fence_unref(&sa_bo->fence); + } radeon_sa_bo_free_locked(rdev, sa_bo);

out: diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index c3763e4..d79afb3 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -55,9 +55,9 @@ static int radeon_semaphore_add_bo(struct radeon_device *rdev) return r; } gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; - gpu_addr += bo->ib->sa_bo.soffset; + gpu_addr += bo->ib->sa_bo->soffset; cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr; - cpu_ptr += (bo->ib->sa_bo.soffset >> 2); + cpu_ptr += (bo->ib->sa_bo->soffset >> 2); for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) { bo->semaphores[i].gpu_addr = gpu_addr; bo->semaphores[i].cpu_ptr = cpu_ptr;

-- 1.7.7.6

Jerome Glisse

4:04 a.m.

On Wed, May 2, 2012 at 12:00 AM, j.glisse@gmail.com wrote:

...

Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Cheers, Jerome

Christian König

9:04 a.m.

On 02.05.2012 06:04, Jerome Glisse wrote:

...

On Wed, May 2, 2012 at 12:00 AM,j.glisse@gmail.com wrote:

...
Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Disagree.

Why should it be better to delay work outside of the interrupt context if proper locking can make the driver much more responsive and easier to implement?

I don't want to call into TTM or stuff like that, just want make it possible to release the resources acquired for a job immediately after the job is completed instead of waiting for the next allocation to happen. Cause then you don't need to check if a bunch of fences might possible be signaled and instead just get a proper signal that resources can be deallocated.

Also if you really want to keep the irq context out of the drivers upper layers, it should be quite easy to modify the code so that the callback won't happen from there.

Christian.

Dave Airlie

10:32 a.m.

On Wed, May 2, 2012 at 10:04 AM, Christian König deathsimple@vodafone.de wrote:

...

On 02.05.2012 06:04, Jerome Glisse wrote:

...
On Wed, May 2, 2012 at 12:00 AM,j.glisse@gmail.com wrote:

...
Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Disagree.

Why should it be better to delay work outside of the interrupt context if proper locking can make the driver much more responsive and easier to implement?

I don't want to call into TTM or stuff like that, just want make it possible to release the resources acquired for a job immediately after the job is completed instead of waiting for the next allocation to happen. Cause then you don't need to check if a bunch of fences might possible be signaled and instead just get a proper signal that resources can be deallocated.

Also if you really want to keep the irq context out of the drivers upper layers, it should be quite easy to modify the code so that the callback won't happen from there.

as a general rule try an minimise how much work we do in irq context, the locking gets very messy once you have to use a mutex somewhere else in the future.

Dave.

Christian König

11:25 a.m.

On 02.05.2012 12:32, Dave Airlie wrote:

...

On Wed, May 2, 2012 at 10:04 AM, Christian König deathsimple@vodafone.de wrote:

...
On 02.05.2012 06:04, Jerome Glisse wrote:

...
On Wed, May 2, 2012 at 12:00 AM,j.glisse@gmail.com wrote:

...
Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Disagree.

Why should it be better to delay work outside of the interrupt context if proper locking can make the driver much more responsive and easier to implement?

I don't want to call into TTM or stuff like that, just want make it possible to release the resources acquired for a job immediately after the job is completed instead of waiting for the next allocation to happen. Cause then you don't need to check if a bunch of fences might possible be signaled and instead just get a proper signal that resources can be deallocated.

Also if you really want to keep the irq context out of the drivers upper layers, it should be quite easy to modify the code so that the callback won't happen from there.

as a general rule try an minimise how much work we do in irq context, the locking gets very messy once you have to use a mutex somewhere else in the future.

Akk, that sounds reasonable, but I still think that a fence should signal it's completion by itself. Because that releases us from the burden to walk the list of emitted fences and heuristically check if any of them is already signaled.

Also it is pretty easy to move the callback outside of interrupt context, but first things first. I'm going to write together a patchset with everything that is already accepted, so we can stop mailing around actually unrelated patches.

Thanks for the comments, Christian.

Jerome Glisse

2:07 p.m.

On Wed, May 2, 2012 at 7:25 AM, Christian König deathsimple@vodafone.de wrote:

...

On 02.05.2012 12:32, Dave Airlie wrote:

...
On Wed, May 2, 2012 at 10:04 AM, Christian König deathsimple@vodafone.de wrote:

...
On 02.05.2012 06:04, Jerome Glisse wrote:

...
On Wed, May 2, 2012 at 12:00 AM,j.glisse@gmail.com wrote:

...
Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Disagree.

Why should it be better to delay work outside of the interrupt context if proper locking can make the driver much more responsive and easier to implement?

I don't want to call into TTM or stuff like that, just want make it possible to release the resources acquired for a job immediately after the job is completed instead of waiting for the next allocation to happen. Cause then you don't need to check if a bunch of fences might possible be signaled and instead just get a proper signal that resources can be deallocated.

Also if you really want to keep the irq context out of the drivers upper layers, it should be quite easy to modify the code so that the callback won't happen from there.

as a general rule try an minimise how much work we do in irq context, the locking gets very messy once you have to use a mutex somewhere else in the future.

Akk, that sounds reasonable, but I still think that a fence should signal it's completion by itself. Because that releases us from the burden to walk the list of emitted fences and heuristically check if any of them is already signaled.

Also it is pretty easy to move the callback outside of interrupt context, but first things first. I'm going to write together a patchset with everything that is already accepted, so we can stop mailing around actually unrelated patches.

Thanks for the comments, Christian.

Yes i agree, the fence should check for itself, irq process should only write a per ring seq_last value (probably good to use an atomic one for this) and when querying fence status the signaling should happen. There is 2 possibilities there, either we keep 32bits seq and keep list. Or we move toward 64bit seq and use arithmetic to know if a fence is signaled or not (assuming that we will never wrap around 64bits fence counter in up time).

But i am still against callback it's just make locking a mess. As discussed previously i think we should be able to have at most 4 lock: dispatch lock (all ring all gpu related activities) ttm lock temporary memory alloc lock fence lock (that one can go away if we don't keep a list of fence anymore) idea is that either ttm path or temporary memory path might call in fence checking.

Cheers, Jerome Glisse

Cheers, Jerome

Alex Deucher

4:26 p.m.

2012/5/2 Christian König deathsimple@vodafone.de:

...

On 02.05.2012 06:04, Jerome Glisse wrote:

...
On Wed, May 2, 2012 at 12:00 AM,j.glisse@gmail.com wrote:

...
Ok so i reread stuff and the : drm/radeon: add general purpose fence signaled callback is a big NAK actually. It change the paradigm. Moving most of the handling into the irq process which is something i am intimatly convinced we should avoid.

Here is the patchset up to ib pool cleanup. I have yet rebase the other patches as i am tracking done some issue in the sa allocation.

Cheers, Jerome

Before i forget, the big issue with doing work from irq handler is that we never know in middle of what other part can be. I believe it's lot better to have irq process only update fence (signaled/not signaled). and have the actual work happening on behalf of the process wether through sa alloc path or ttm path.

Disagree.

Why should it be better to delay work outside of the interrupt context if proper locking can make the driver much more responsive and easier to implement?

I don't want to call into TTM or stuff like that, just want make it possible to release the resources acquired for a job immediately after the job is completed instead of waiting for the next allocation to happen. Cause then you don't need to check if a bunch of fences might possible be signaled and instead just get a proper signal that resources can be deallocated.

We use two fences per IB, one for the command buffer itself and the other for the actual IB allocations. That way the the IB can be re-used without waiting for the fence after the synchronization. I'm not sure it's worth the extra complexity though.

Alex

4744

Age (days ago)

4744

Last active (days ago)

dri-devel@lists.freedesktop.org

19 comments

5 participants

tags (0)

participants (5)

Alex Deucher
Christian König
Dave Airlie
j.glisse＠gmail.com
Jerome Glisse