>From 195a9ce19dbe8c4af7e10fa8475d932826434441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 18 Sep 2014 22:51:21 -0400 Subject: [PATCH] drm/radeon: cs sequence id and cs completion query. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This report back to userspace ring id and sequence number that can be use by userspace to query for the completetion of the cs on the hardware. This also add a new ioctl to perform such query. This patch is aimed to introduce the necessary ground work for user space explicit synchronization. By allowing userspace to query about cs completetion on hardware, user space can perform operation and synchronization on buffer by itself without having the cs ioctl to implicitly wait for older cs completetion before scheduling new cs. This part is however left to a follow-up patch. Signed-off-by: Jérôme Glisse --- drivers/gpu/drm/radeon/radeon_cs.c | 41 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/uapi/drm/radeon_drm.h | 8 ++++++++ 3 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 83f382e..50fcc09 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -404,6 +404,15 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->ib.fence); + if (parser->chunk_flags && parser->chunk_flags->length_dw > 4) { + uint32_t __user *to = parser->chunk_flags->user_ptr; + uint32_t tmp; + + copy_to_user(&to[3], &parser->ib.fence->seq, + sizeof(uint64_t)); + tmp = parser->ib.fence->ring; + copy_to_user(&to[5], &tmp, sizeof(uint32_t)); + } } else if (backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); @@ -823,3 +832,35 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, *cs_reloc = p->relocs_ptr[(idx / 4)]; return 0; } + +int radeon_cs_done_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_cs_done *args = data; + unsigned ring = args->ring; + int64_t sync_seq, last_seq; + + /* FIXME check args->ring value is ok. */ + + /* + * If we consider that last_seq and sync_seq form a range (adjusting + * for wrap around) ]last_seq, sync_seq]. Then queried cs is done if + * its seq value is outside that range. + * + * This gracefully handle wrap around ie case where userspace query a + * very old cs seq value, if seq value is bigger than current sync_seq + * then we know it's and old and signaled fence. If it's smaller or + * equal to last_seq we know it is signaled. If it's inside the range + * ]last_seq, sync_seq] we can not know yet but we can be sure that it + * will signal as at very least the hardware will increase last_seq up + * to sync_seq. + */ + sync_seq = ACCESS_ONCE(rdev->fence_drv[ring].sync_seq[ring]); + last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); + if ((last_seq - args->seq) >= 0) + return 1; + if ((sync_seq - args->seq) < 0) + return 1; + + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eb7164d..c9cfcf5 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_CS_DONE, radeon_cs_done_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index fea6099..ed246d9 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -554,6 +554,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_CS_DONE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS_DONE, struct drm_radeon_cs_done) typedef struct drm_radeon_init { enum { @@ -936,6 +937,7 @@ struct drm_radeon_gem_va { #define RADEON_CS_RING_VCE 4 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* 0 = normal, + = higher priority, - = lower priority */ +/* The third, fourth, fifth dword are a 32bit fence id, wrap id, ring id */ struct drm_radeon_cs_chunk { uint32_t chunk_id; @@ -1038,4 +1040,10 @@ struct drm_radeon_info { #define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 +struct drm_radeon_cs_done { + int64_t seq; + int32_t ring; + int32_t pad; +}; + #endif -- 1.9.3