Change-Id: I6c891515d93a6f1a597e762090c3530a6810c6c6 --- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 50 ++++++++++++++++++++++++++---- drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 41 ++++++++++++++++-------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++ 4 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index ef20cb5..9ed3abe 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -111,11 +111,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000); - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); + /* Early A430's have a timing issue with SP/TP power collapse; + disabling HW clock gating prevents it. */ + if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2) + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); + else + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); }
+ static void a4xx_me_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb; @@ -150,7 +156,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) uint32_t *ptr, len; int i, ret;
- if (adreno_is_a4xx(adreno_gpu)) { + if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); @@ -159,6 +165,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); + } else if (adreno_is_a430(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); } else { BUG(); } @@ -170,6 +183,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+ if (adreno_is_a430(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); + } + /* Enable the RBBM error reporting bits */ gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
@@ -192,6 +209,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
+ if (adreno_is_a430(adreno_gpu)) + gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); + /* Disable L2 bypass to avoid UCHE out of bounds errors */ gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); @@ -199,6 +219,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
+ /* On A430 enable SP regfile sleep for power savings */ + /* TODO downstream does this for !420, so maybe applies for 405 too? */ + if (!adreno_is_a420(adreno_gpu)) { + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, + 0x00000441); + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, + 0x00000441); + } + a4xx_enable_hwcg(gpu);
/* @@ -213,9 +242,11 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); }
- ret = adreno_hw_init(gpu); - if (ret) - return ret; + if (!adreno_is_a430(adreno_gpu)) { + ret = adreno_hw_init(gpu); + if (ret) + return ret; + }
/* setup access protection: */ gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); @@ -326,6 +357,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu) status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); DBG("%s: Int status %08x", gpu->name, status);
+ if (status & (1 << A4XX_INT_CP_REG_PROTECT_FAULT)) { + uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS); + printk("CP | Protected mode error| %s | addr=%x\n", + reg & (1 << 24) ? "WRITE" : "READ", + (reg & 0xFFFFF) >> 2); + } + gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
msm_gpu_retire(gpu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 1ea2df5..43d5aaa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -73,6 +73,14 @@ static const struct adreno_info gpulist[] = { .pfpfw = "a420_pfp.fw", .gmem = (SZ_1M + SZ_512K), .init = a4xx_gpu_init, + }, { + .rev = ADRENO_REV(4, 3, 0, ANY_ID), + .revn = 430, + .name = "A430", + .pm4fw = "a420_pm4.fw", + .pfpfw = "a420_pfp.fw", + .gmem = (SZ_1M + SZ_512K), + .init = a4xx_gpu_init, }, };
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a3b54cc..33c96d0d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -68,18 +68,15 @@ int adreno_hw_init(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, /* size is log2(quad-words): */ AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8))); + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? (1 << 27) : 0));
/* Setup ringbuffer address: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova); - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - rbmemptr(adreno_gpu, rptr));
- /* Setup scratch/timestamp: */ - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR, - rbmemptr(adreno_gpu, fence)); - - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1); + if (!adreno_is_a430(adreno_gpu)) + adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, + rbmemptr(adreno_gpu, rptr));
return 0; } @@ -89,6 +86,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) return ring->cur - ring->start; }
+/* Use this helper to read rptr, since a430 doesn't update rptr in memory */ +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +{ + if (adreno_is_a430(adreno_gpu)) + return adreno_gpu->memptrs->rptr = adreno_gpu_read( + adreno_gpu, REG_ADRENO_CP_RB_RPTR); + else + return adreno_gpu->memptrs->rptr; +} + uint32_t adreno_last_fence(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -137,7 +144,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, if (priv->lastctx == ctx) break; case MSM_SUBMIT_CMD_BUF: - OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); + OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? + CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); OUT_RING(ring, submit->cmd[i].iova); OUT_RING(ring, submit->cmd[i].size); ibs++; @@ -216,9 +224,16 @@ void adreno_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr = get_wptr(gpu->rb); + int ret;
/* wait for CP to drain ringbuffer: */ - if (spin_until(adreno_gpu->memptrs->rptr == wptr)) + if (adreno_is_a430(adreno_gpu)) + ret = spin_until((adreno_gpu->memptrs->rptr = adreno_gpu_read( + adreno_gpu, REG_ADRENO_CP_RB_RPTR)) == wptr); + else + ret = spin_until(adreno_gpu->memptrs->rptr == wptr); + + if (ret) DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
/* TODO maybe we need to reset GPU here to recover from hang? */ @@ -237,7 +252,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->submitted_fence); - seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); + seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
@@ -278,7 +293,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->submitted_fence); - printk("rptr: %d\n", adreno_gpu->memptrs->rptr); + printk("rptr: %d\n", get_rptr(adreno_gpu)); printk("wptr: %d\n", adreno_gpu->memptrs->wptr); printk("rb wptr: %d\n", get_wptr(gpu->rb));
@@ -313,7 +328,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t size = gpu->rb->size / 4; uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = adreno_gpu->memptrs->rptr; + uint32_t rptr = get_rptr(adreno_gpu); return (rptr + (size - 1) - wptr) % size; }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 0a312e9..c26aea1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -228,6 +228,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu) return gpu->revn == 420; }
+static inline int adreno_is_a430(struct adreno_gpu *gpu) +{ + return gpu->revn == 430; +} + int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); int adreno_hw_init(struct msm_gpu *gpu); uint32_t adreno_last_fence(struct msm_gpu *gpu);
Change-Id: Ife53627e9985e1204cc319da0221338e5272eb81 --- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 39 +++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 9ed3abe..496732d 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -559,12 +559,47 @@ static void a4xx_dump(struct msm_gpu *gpu) adreno_dump(gpu); }
+static int a4xx_pm_resume(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int ret; + + ret = msm_gpu_pm_resume(gpu); + if (ret) + return ret; + + if (adreno_is_a430(adreno_gpu)) { + unsigned int reg; + /* Set the default register values; set SW_COLLAPSE to 0 */ + gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000); + do { + udelay(5); + reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS); + } while (!(reg & SP_TP_PWR_ON)); + } + return 0; +} + +static int a4xx_pm_suspend(struct msm_gpu *gpu) { + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int ret; + + ret = msm_gpu_pm_suspend(gpu); + if (ret) + return ret; + + if (adreno_is_a430(adreno_gpu)) { + /* Set the default register values; set SW_COLLAPSE to 1 */ + gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001); + } + return 0; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, .hw_init = a4xx_hw_init, - .pm_suspend = msm_gpu_pm_suspend, - .pm_resume = msm_gpu_pm_resume, + .pm_suspend = a4xx_pm_suspend, + .pm_resume = a4xx_pm_resume, .recover = a4xx_recover, .last_fence = adreno_last_fence, .submit = adreno_submit,
On Tue, Feb 2, 2016 at 4:06 PM, C Stout cstout@chromium.org wrote:
Change-Id: Ife53627e9985e1204cc319da0221338e5272eb81
thanks, looks good. I should mention we usually drop the change-id tag's upstream. But I can fix that up when I apply the patch, so no need to re-send.
BR, -R
drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 39 +++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 9ed3abe..496732d 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -559,12 +559,47 @@ static void a4xx_dump(struct msm_gpu *gpu) adreno_dump(gpu); }
+static int a4xx_pm_resume(struct msm_gpu *gpu) {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret;
ret = msm_gpu_pm_resume(gpu);
if (ret)
return ret;
if (adreno_is_a430(adreno_gpu)) {
unsigned int reg;
/* Set the default register values; set SW_COLLAPSE to 0 */
gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
do {
udelay(5);
reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
} while (!(reg & SP_TP_PWR_ON));
}
return 0;
+}
+static int a4xx_pm_suspend(struct msm_gpu *gpu) {
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
int ret;
ret = msm_gpu_pm_suspend(gpu);
if (ret)
return ret;
if (adreno_is_a430(adreno_gpu)) {
/* Set the default register values; set SW_COLLAPSE to 1 */
gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
}
return 0;
+}
static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, .hw_init = a4xx_hw_init,
.pm_suspend = msm_gpu_pm_suspend,
.pm_resume = msm_gpu_pm_resume,
.pm_suspend = a4xx_pm_suspend,
.pm_resume = a4xx_pm_resume, .recover = a4xx_recover, .last_fence = adreno_last_fence, .submit = adreno_submit,
-- 2.1.4
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
overall, looks pretty good, few small comments inline
On Tue, Feb 2, 2016 at 4:06 PM, C Stout cstout@chromium.org wrote:
Change-Id: I6c891515d93a6f1a597e762090c3530a6810c6c6
drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 50 ++++++++++++++++++++++++++---- drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 41 ++++++++++++++++-------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++ 4 files changed, 85 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index ef20cb5..9ed3abe 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -111,11 +111,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
/* Early A430's have a timing issue with SP/TP power collapse;
disabling HW clock gating prevents it. */
if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
else
gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
}
static void a4xx_me_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb; @@ -150,7 +156,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) uint32_t *ptr, len; int i, ret;
if (adreno_is_a4xx(adreno_gpu)) {
if (adreno_is_a420(adreno_gpu)) { gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
@@ -159,6 +165,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
} else if (adreno_is_a430(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); } else { BUG(); }
@@ -170,6 +183,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
if (adreno_is_a430(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
}
/* Enable the RBBM error reporting bits */ gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
@@ -192,6 +209,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu) /* Turn on performance counters: */ gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
if (adreno_is_a430(adreno_gpu))
gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
/* Disable L2 bypass to avoid UCHE out of bounds errors */ gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
@@ -199,6 +219,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
/* On A430 enable SP regfile sleep for power savings */
/* TODO downstream does this for !420, so maybe applies for 405 too? */
if (!adreno_is_a420(adreno_gpu)) {
gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
0x00000441);
gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
0x00000441);
}
a4xx_enable_hwcg(gpu); /*
@@ -213,9 +242,11 @@ static int a4xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); }
ret = adreno_hw_init(gpu);
if (ret)
return ret;
hmm, it seems a bit wrong to never call adreno_hw_init() for a430 (esp. when below you have some changes in adreno_hw_init()).. am I not seeing something, or did you miss a hunk?
if (!adreno_is_a430(adreno_gpu)) {
ret = adreno_hw_init(gpu);
if (ret)
return ret;
} /* setup access protection: */ gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
@@ -326,6 +357,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu) status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); DBG("%s: Int status %08x", gpu->name, status);
if (status & (1 << A4XX_INT_CP_REG_PROTECT_FAULT)) {
uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
printk("CP | Protected mode error| %s | addr=%x\n",
reg & (1 << 24) ? "WRITE" : "READ",
(reg & 0xFFFFF) >> 2);
}
this looks pretty useful, but also unrelated to a430. Mind splitting it out into it's own patch?
gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status); msm_gpu_retire(gpu);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 1ea2df5..43d5aaa 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -73,6 +73,14 @@ static const struct adreno_info gpulist[] = { .pfpfw = "a420_pfp.fw", .gmem = (SZ_1M + SZ_512K), .init = a4xx_gpu_init,
}, {
.rev = ADRENO_REV(4, 3, 0, ANY_ID),
.revn = 430,
.name = "A430",
.pm4fw = "a420_pm4.fw",
.pfpfw = "a420_pfp.fw",
.gmem = (SZ_1M + SZ_512K),
.init = a4xx_gpu_init, },
};
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index a3b54cc..33c96d0d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -68,18 +68,15 @@ int adreno_hw_init(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, /* size is log2(quad-words): */ AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
(adreno_is_a430(adreno_gpu) ? (1 << 27) : 0));
So.. from the a3xx (apq8016) docs which I do have, bit 27 here is 'RB_NO_UPDATE - Disable writes of Read Pointer to memory'. And actually looks like we already have AXXX_CP_RB_CNTL_NO_UPDATE in the .xml.h so no need to open code it.
Downstream commit which makes this change mentions "Sometimes the RPTR shadow memory is unreliable causing timeouts in adreno_idle(). Read it directly from the register instead." This might be a bit easier to understand in the git history if we split the logical equivalent of downstream cdcc02a68649f9929e3f3a5a84bfcac5e480894e into one patch, and then the remaining a430 changes as preceding patch. I think the RB_NO_UPDATE change we could just make unconditional (although if you want to take the safe route, make it conditional on a430 for now and once I have a chance to test it on others I can flip it to unconditional).
/* Setup ringbuffer address: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova);
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
rbmemptr(adreno_gpu, rptr));
/* Setup scratch/timestamp: */
adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR,
rbmemptr(adreno_gpu, fence));
adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1);
if (!adreno_is_a430(adreno_gpu))
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
rbmemptr(adreno_gpu, rptr)); return 0;
} @@ -89,6 +86,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) return ring->cur - ring->start; }
+/* Use this helper to read rptr, since a430 doesn't update rptr in memory */ +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +{
if (adreno_is_a430(adreno_gpu))
return adreno_gpu->memptrs->rptr = adreno_gpu_read(
adreno_gpu, REG_ADRENO_CP_RB_RPTR);
else
return adreno_gpu->memptrs->rptr;
+}
uint32_t adreno_last_fence(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -137,7 +144,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, if (priv->lastctx == ctx) break; case MSM_SUBMIT_CMD_BUF:
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
so far, other than a420 which I still need to test, I think we can switch to 'prefetch enabled' version of the IB packet across the board. But I think it's fine to leave it, I can either drop the hunk if I've had a chance to test a420 or remove it later.
OUT_RING(ring, submit->cmd[i].iova); OUT_RING(ring, submit->cmd[i].size); ibs++;
@@ -216,9 +224,16 @@ void adreno_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr = get_wptr(gpu->rb);
int ret; /* wait for CP to drain ringbuffer: */
if (spin_until(adreno_gpu->memptrs->rptr == wptr))
if (adreno_is_a430(adreno_gpu))
ret = spin_until((adreno_gpu->memptrs->rptr = adreno_gpu_read(
adreno_gpu, REG_ADRENO_CP_RB_RPTR)) == wptr);
else
ret = spin_until(adreno_gpu->memptrs->rptr == wptr);
I think just use get_rptr() here?
BR, -R
if (ret) DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); /* TODO maybe we need to reset GPU here to recover from hang? */
@@ -237,7 +252,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->submitted_fence);
seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr);
seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
@@ -278,7 +293,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, gpu->submitted_fence);
printk("rptr: %d\n", adreno_gpu->memptrs->rptr);
printk("rptr: %d\n", get_rptr(adreno_gpu)); printk("wptr: %d\n", adreno_gpu->memptrs->wptr); printk("rb wptr: %d\n", get_wptr(gpu->rb));
@@ -313,7 +328,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu) struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t size = gpu->rb->size / 4; uint32_t wptr = get_wptr(gpu->rb);
uint32_t rptr = adreno_gpu->memptrs->rptr;
uint32_t rptr = get_rptr(adreno_gpu); return (rptr + (size - 1) - wptr) % size;
}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 0a312e9..c26aea1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -228,6 +228,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu) return gpu->revn == 420; }
+static inline int adreno_is_a430(struct adreno_gpu *gpu) +{
return gpu->revn == 430;
+}
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); int adreno_hw_init(struct msm_gpu *gpu); uint32_t adreno_last_fence(struct msm_gpu *gpu); -- 2.1.4
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org