>From b95fe503dad89875da9db2d11d05f93eca41232d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 18 Sep 2014 22:51:21 -0400 Subject: [PATCH] drm/radeon: cs sequence id and cs completion query. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This report back to userspace ring id and sequence number that can be use by userspace to query for the completetion of the cs on the hardware. This also add a new ioctl to perform such query. There is an extra value supplied to userspace the wrap counter that is use to detect wrap around and to gracefuly handle the case where userspace might be querying about a very, very, very old cs executed long time ago in a far far away universe. This patch is aimed to introduce the necessary ground work for user space explicit synchronization. By allowing userspace to query about cs completetion on hardware, user space can perform operation and synchronization on buffer by itself without having the cs ioctl to implicitly wait for older cs completetion before scheduling new cs. This part is however left to a follow-up patch. Signed-off-by: Jérôme Glisse --- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_cs.c | 60 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_fence.c | 4 +++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/uapi/drm/radeon_drm.h | 9 ++++++ 5 files changed, 76 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5f05b4c..1f1dd1f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -118,6 +118,7 @@ extern int radeon_bapm; #define RADEON_DEBUGFS_MAX_COMPONENTS 32 #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 +#define RADEON_SEQ_WRAP_VALUE (1 << 30) /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL @@ -355,6 +356,7 @@ struct radeon_fence_driver { /* sync_seq is protected by ring emission lock */ uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; + int32_t wrap_seq; bool initialized; }; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 83f382e..be4ae25 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -404,6 +404,17 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->ib.fence); + if (parser->chunk_flags && parser->chunk_flags->length_dw > 4) { + uint32_t __user *to = parser->chunk_flags->user_ptr; + uint32_t tmp; + + tmp = lower_32_bits(parser->ib.fence->seq); + copy_to_user(&to[3], &tmp, sizeof(uint32_t)); + tmp = parser->ib.fence->ring; + copy_to_user(&to[4], &tmp, sizeof(uint32_t)); + tmp = rdev->fence_drv[tmp]->wrap_seq; + copy_to_user(&to[5], &tmp, sizeof(uint32_t)); + } } else if (backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); @@ -823,3 +834,52 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, *cs_reloc = p->relocs_ptr[(idx / 4)]; return 0; } + +int radeon_cs_done_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_cs_done *args = data; + unsigned i = args->ring; + int32_t last_seq, sync_seq, wrap_seq; + + /* FIXME check args->ring value is ok. */ + + /* + * The memory barrier is match with the one in radeon_fence_emit() and + * it insure us that we get the right matching wrap_seq and sync_seq. + * + * Note that no need to protect the fence_drv.sync_seq here as barrier + * insure us we will get the coherency we need. + */ + wrap_seq = ACCESS_ONCE(rdev->fence_drv[i].wrap_seq); + smp_rmb(); + sync_seq = lower_32_bits(ACCESS_ONCE(rdev->fence_drv[i].sync_seq[i])); + + /* + * So if current wrap_seq and one we are queried with are differ by + * more than one this means that we are queried about a very old fence + * seq value and we can assume it is long done. + * + * Well this is not entirely true, for it to be true we would need to + * stall when we increment the wrap counter if cs in previous wrap were + * not completed but this is highly unlikely. So live with the trill of + * it going wrong ! + */ + if (abs((unsigned)wrap_seq - (unsigned)args->wrap) > 1) + return 1; + /* Now check if currently reported fence seq is done or not. */ + /* FIXME call fence func to update last_seq just in case. */ + last_seq = lower_32_bits(atomic64_read(&rdev->fence_drv[i].last_seq)); + if ((last_seq - arg->seq) >= 0) + return 1; + /* + * Last failsafe to handle the horrible case were userspace holded on + * a wrap and seq value for so long without querying that it we wrapped + * around. This is here to avoid userspace waiting for a fence that was + * emited a long time ago but the current sync_seq[ring] value migth be + * stuck and thus we might go bigger than this very old seq value. + */ + if (((sync_seq - args->seq) < 0) && args->wrap == wrap_seq) + return 1; + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 9137870..a6adcf6 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -119,6 +119,10 @@ int radeon_fence_emit(struct radeon_device *rdev, kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; + /* Barrier is important for radeon_fence_cs_done_ioctl. */ + smp_wmb(); + if (rdev->fence_drv[ring].sync_seq[ring] == RADEON_SEQ_WRAP_VALUE) + rdev->fence_drv[ring].wrap_seq++; (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eb7164d..c9cfcf5 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_CS_DONE, radeon_cs_done_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index fea6099..a0c215d 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -554,6 +554,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_CS_DONE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS_DONE, struct drm_radeon_cs_done) typedef struct drm_radeon_init { enum { @@ -936,6 +937,7 @@ struct drm_radeon_gem_va { #define RADEON_CS_RING_VCE 4 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* 0 = normal, + = higher priority, - = lower priority */ +/* The fifth, sixth, seventh dword are a 32bit fence ID, ring id and wrap id of this CS */ struct drm_radeon_cs_chunk { uint32_t chunk_id; @@ -1038,4 +1040,11 @@ struct drm_radeon_info { #define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 +struct drm_radeon_cs_done { + int32_t seq; + int32_t ring; + int32_t wrap; + int32_t pad; +}; + #endif -- 1.9.3