[PATCH v2 1/6] drm/v3d: Document cache flushing ABI.

List overview All Threads
Download

newer

older

[PATCH v2 0/2] Two AST driver fixes

[PATCH -next] drm/vkms: Remove set...

Eric Anholt

3 Dec 2018 3 Dec '18

10:24 p.m.

Right now, userspace doesn't do any L2T writes, but we should lay out our expectations for how it works.

v2: Explicitly mention the VCD cache flushing requirements and that we'll flush the other caches before each of the CLs.

Signed-off-by: Eric Anholt eric@anholt.net --- include/uapi/drm/v3d_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 35c7d813c66e..ea70669d2138 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -52,6 +52,14 @@ extern "C" { * * This asks the kernel to have the GPU execute an optional binner * command list, and a render command list. + * + * The L1T, slice, L2C, L2T, and GCA caches will be flushed before + * each CL executes. The VCD cache should be flushed (if necessary) + * by the submitted CLs. The TLB writes are guaranteed to have been + * flushed by the time the render done IRQ happens, which is the + * trigger for out_sync. Any dirtying of cachelines by the job (only + * possible using TMU writes) must be flushed by the caller using the + * CL's cache flush commands. */ struct drm_v3d_submit_cl { /* Pointer to the binner command list.

-- 2.20.0.rc1

Show replies by date

Eric Anholt

3 Dec 3 Dec

10:24 p.m.

New subject: [PATCH v2 2/6] drm/v3d: Drop unused v3d_flush_caches().

Now that I've specified how the end-of-pipeline flushing should work, we're never going to use this function.

Signed-off-by: Eric Anholt eric@anholt.net Reviewed-by: Dave Emett david.emett@broadcom.com --- drivers/gpu/drm/v3d/v3d_drv.h | 1 - drivers/gpu/drm/v3d/v3d_gem.c | 21 --------------------- 2 files changed, 22 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index bcd3d567bec2..239b56d76f3e 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -314,7 +314,6 @@ void v3d_exec_put(struct v3d_exec_info *exec); void v3d_tfu_job_put(struct v3d_tfu_job *exec); void v3d_reset(struct v3d_dev *v3d); void v3d_invalidate_caches(struct v3d_dev *v3d); -void v3d_flush_caches(struct v3d_dev *v3d);

/* v3d_irq.c */ void v3d_irq_init(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index f565b197cba9..92413cbcf92c 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -175,20 +175,6 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core) V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); }

-/* Invalidates texture L2 cachelines */ -static void -v3d_invalidate_l2t(struct v3d_dev *v3d, int core) -{ - V3D_CORE_WRITE(core, - V3D_CTL_L2TCACTL, - V3D_L2TCACTL_L2TFLS | - V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM)); - if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & - V3D_L2TCACTL_L2TFLS), 100)) { - DRM_ERROR("Timeout waiting for L2T invalidate\n"); - } -} - void v3d_invalidate_caches(struct v3d_dev *v3d) { @@ -199,13 +185,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d) v3d_flush_l2t(v3d, 0); }

-void -v3d_flush_caches(struct v3d_dev *v3d) -{ - v3d_invalidate_l1td(v3d, 0); - v3d_invalidate_l2t(v3d, 0); -} - static void v3d_attach_object_fences(struct v3d_bo **bos, int bo_count, struct dma_fence *fence)

-- 2.20.0.rc1

Eric Anholt

10:24 p.m.

New subject: [PATCH v2 3/6] drm/v3d: Don't bother flushing L1TD at job start.

This is the write combiner for TMU writes. You're supposed to flush that at job end if you had dirtied any cachelines. Flushing it at job start then doesn't make any sense.

Signed-off-by: Eric Anholt eric@anholt.net Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Reviewed-by: Dave Emett david.emett@broadcom.com --- drivers/gpu/drm/v3d/v3d_gem.c | 12 ------------ 1 file changed, 12 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 92413cbcf92c..01e879c71cad 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -139,22 +139,10 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core) V3D_L2CACTL_L2CENA); }

-static void -v3d_invalidate_l1td(struct v3d_dev *v3d, int core) -{ - V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); - if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & - V3D_L2TCACTL_L2TFLS), 100)) { - DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); - } -} - /* Invalidates texture L2 cachelines */ static void v3d_flush_l2t(struct v3d_dev *v3d, int core) { - v3d_invalidate_l1td(v3d, core); - V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_L2TFLS | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));

-- 2.20.0.rc1

Eric Anholt

10:24 p.m.

New subject: [PATCH v2 4/6] drm/v3d: Drop the wait for L2T flush to complete.

According to Dave, once you've started an L2T flush, all L2T accesses will be blocked until the flush completes. This fixes a consistent 3-4ms stall between the ioctl and running the job, and 3DMMES Taiji goes from 27fps to 110fps.

v2: Leave a note about why we don't need to wait for completion.

Signed-off-by: Eric Anholt eric@anholt.net Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Reviewed-by: Dave Emett david.emett@broadcom.com (v1, comment requested) --- drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 01e879c71cad..c268c7c79566 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -143,13 +143,13 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core) static void v3d_flush_l2t(struct v3d_dev *v3d, int core) { + /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't + * need to wait for completion before dispatching the job -- + * L2T accesses will be stalled until the flush has completed. + */ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_L2TFLS | V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); - if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & - V3D_L2TCACTL_L2TFLS), 100)) { - DRM_ERROR("Timeout waiting for L2T flush\n"); - } }

/* Invalidates the slice caches. These are read-only caches. */

-- 2.20.0.rc1

Dave Emett

4 Dec 4 Dec

11:53 a.m.

New subject: [PATCH v2 4/6] drm/v3d: Drop the wait for L2T flush to complete.

On Mon, 3 Dec 2018 at 22:24, Eric Anholt eric@anholt.net wrote:

...

According to Dave, once you've started an L2T flush, all L2T accesses will be blocked until the flush completes. This fixes a consistent 3-4ms stall between the ioctl and running the job, and 3DMMES Taiji goes from 27fps to 110fps.

v2: Leave a note about why we don't need to wait for completion.

Signed-off-by: Eric Anholt eric@anholt.net

Reviewed-by: Dave Emett david.emett@broadcom.com

...

Fixes: 57692c94dcbe ("drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+") Reviewed-by: Dave Emett david.emett@broadcom.com (v1, comment requested)

drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 01e879c71cad..c268c7c79566 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -143,13 +143,13 @@ v3d_invalidate_l2(struct v3d_dev *v3d, int core) static void v3d_flush_l2t(struct v3d_dev *v3d, int core) {
  /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
   * need to wait for completion before dispatching the job --
   * L2T accesses will be stalled until the flush has completed.
   */
  V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
                 V3D_L2TCACTL_L2TFLS |
                 V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
  if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
                 V3D_L2TCACTL_L2TFLS), 100)) {
          DRM_ERROR("Timeout waiting for L2T flush\n");
  }
}

/* Invalidates the slice caches. These are read-only caches. */

2.20.0.rc1

Eric Anholt

3 Dec 3 Dec

10:24 p.m.

New subject: [PATCH v2 5/6] drm/v3d: Stop trying to flush L2C on V3D 3.3+

This cache was replaced with the slice accessing the L2T in the newer generations. Noted by Dave during review.

Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/v3d/v3d_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index c268c7c79566..8a4be9515179 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -130,10 +130,15 @@ v3d_flush_l3(struct v3d_dev *v3d) } }

-/* Invalidates the (read-only) L2 cache. */ +/* Invalidates the (read-only) L2C cache. This was the L2 cache for + * uniforms and instructions on V3D 3.2. + */ static void -v3d_invalidate_l2(struct v3d_dev *v3d, int core) +v3d_invalidate_l2c(struct v3d_dev *v3d, int core) { + if (v3d->ver > 32) + return; + V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, V3D_L2CACTL_L2CCLR | V3D_L2CACTL_L2CENA); @@ -168,7 +173,7 @@ v3d_invalidate_caches(struct v3d_dev *v3d) { v3d_flush_l3(v3d);

- v3d_invalidate_l2(v3d, 0); + v3d_invalidate_l2c(v3d, 0); v3d_invalidate_slices(v3d, 0); v3d_flush_l2t(v3d, 0); }

-- 2.20.0.rc1

Dave Emett

4 Dec 4 Dec

11:54 a.m.

New subject: [PATCH v2 5/6] drm/v3d: Stop trying to flush L2C on V3D 3.3+

On Mon, 3 Dec 2018 at 22:24, Eric Anholt eric@anholt.net wrote:

...

This cache was replaced with the slice accessing the L2T in the newer generations. Noted by Dave during review.

Signed-off-by: Eric Anholt eric@anholt.net

Reviewed-by: Dave Emett david.emett@broadcom.com

...

drivers/gpu/drm/v3d/v3d_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index c268c7c79566..8a4be9515179 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -130,10 +130,15 @@ v3d_flush_l3(struct v3d_dev *v3d) } }

-/* Invalidates the (read-only) L2 cache. */ +/* Invalidates the (read-only) L2C cache. This was the L2 cache for

uniforms and instructions on V3D 3.2.

*/

static void -v3d_invalidate_l2(struct v3d_dev *v3d, int core) +v3d_invalidate_l2c(struct v3d_dev *v3d, int core) {
  if (v3d->ver > 32)
          return;
  V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
                 V3D_L2CACTL_L2CCLR |
                 V3D_L2CACTL_L2CENA);
@@ -168,7 +173,7 @@ v3d_invalidate_caches(struct v3d_dev *v3d) { v3d_flush_l3(v3d);
  v3d_invalidate_l2(v3d, 0);
  v3d_invalidate_l2c(v3d, 0);
  v3d_invalidate_slices(v3d, 0);
  v3d_flush_l2t(v3d, 0);
}

2.20.0.rc1

Eric Anholt

3 Dec 3 Dec

10:24 p.m.

New subject: [PATCH v2 6/6] drm/v3d: Invalidate the caches from the outside in.

This would be a fairly obscure race, but let's make sure we don't ever lose it.

Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 8a4be9515179..443b1c53117a 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -171,11 +171,15 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core) void v3d_invalidate_caches(struct v3d_dev *v3d) { + /* Invalidate the caches from the outside in. That way if + * another CL's concurrent use of nearby memory were to pull + * an invalidated cacheline back in, we wouldn't leave stale + * data in the inner cache. + */ v3d_flush_l3(v3d); - v3d_invalidate_l2c(v3d, 0); - v3d_invalidate_slices(v3d, 0); v3d_flush_l2t(v3d, 0); + v3d_invalidate_slices(v3d, 0); }

static void

-- 2.20.0.rc1

Dave Emett

4 Dec 4 Dec

12:01 p.m.

New subject: [PATCH v2 6/6] drm/v3d: Invalidate the caches from the outside in.

On Mon, 3 Dec 2018 at 22:24, Eric Anholt eric@anholt.net wrote:

...

This would be a fairly obscure race, but let's make sure we don't ever lose it.

Signed-off-by: Eric Anholt eric@anholt.net

Reviewed-by: Dave Emett david.emett@broadcom.com

...

drivers/gpu/drm/v3d/v3d_gem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 8a4be9515179..443b1c53117a 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -171,11 +171,15 @@ v3d_invalidate_slices(struct v3d_dev *v3d, int core) void v3d_invalidate_caches(struct v3d_dev *v3d) {
  /* Invalidate the caches from the outside in.  That way if
   * another CL's concurrent use of nearby memory were to pull
   * an invalidated cacheline back in, we wouldn't leave stale
   * data in the inner cache.
   */
  v3d_flush_l3(v3d);
  v3d_invalidate_l2c(v3d, 0);
  v3d_invalidate_slices(v3d, 0);
  v3d_flush_l2t(v3d, 0);
  v3d_invalidate_slices(v3d, 0);
}

static void

2.20.0.rc1

Dave Emett

11:51 a.m.

On Mon, 3 Dec 2018 at 22:24, Eric Anholt eric@anholt.net wrote:

...

Right now, userspace doesn't do any L2T writes, but we should lay out our expectations for how it works.

v2: Explicitly mention the VCD cache flushing requirements and that we'll flush the other caches before each of the CLs.

Signed-off-by: Eric Anholt eric@anholt.net

Reviewed-by: Dave Emett david.emett@broadcom.com

...

include/uapi/drm/v3d_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 35c7d813c66e..ea70669d2138 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -52,6 +52,14 @@ extern "C" {

This asks the kernel to have the GPU execute an optional binner

command list, and a render command list.

The L1T, slice, L2C, L2T, and GCA caches will be flushed before

each CL executes. The VCD cache should be flushed (if necessary)

by the submitted CLs. The TLB writes are guaranteed to have been

flushed by the time the render done IRQ happens, which is the

trigger for out_sync. Any dirtying of cachelines by the job (only

possible using TMU writes) must be flushed by the caller using the

CL's cache flush commands.

*/

struct drm_v3d_submit_cl { /* Pointer to the binner command list. -- 2.20.0.rc1

2331

Age (days ago)

2332

Last active (days ago)

dri-devel@lists.freedesktop.org

9 comments

2 participants

tags (0)

participants (2)

Dave Emett
Eric Anholt