Am 17.08.19 um 16:47 schrieb Chris Wilson:
The timestamp and the cb_list are mutually exclusive, the cb_list can only be added to prior to being signaled (and once signaled we drain), while the timestamp is only valid upon being signaled. Both the timestamp and the cb_list are only valid while the fence is alive, and as soon as no references are held can be replaced by the rcu_head.
By reusing the union for the timestamp, we squeeze the base dma_fence struct to 64 bytes on x86-64.
Suggested-by: Christian König christian.koenig@amd.com Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence.c | 16 +++++++++------- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +++++++------ drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 3 +++ include/linux/dma-fence.h | 17 +++++++++++++---- 4 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 89d96e3e6df6..2c21115b1a37 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -129,6 +129,7 @@ EXPORT_SYMBOL(dma_fence_context_alloc); int dma_fence_signal_locked(struct dma_fence *fence) { struct dma_fence_cb *cur, *tmp;
struct list_head cb_list;
lockdep_assert_held(fence->lock);
@@ -136,16 +137,16 @@ int dma_fence_signal_locked(struct dma_fence *fence) &fence->flags))) return -EINVAL;
- /* Stash the cb_list before replacing it with the timestamp */
- list_replace(&fence->cb_list, &cb_list);
Stashing the timestamp instead is probably less bytes to modify.
- fence->timestamp = ktime_get(); set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence);
- if (!list_empty(&fence->cb_list)) {
list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
INIT_LIST_HEAD(&cur->node);
cur->func(fence, cur);
}
INIT_LIST_HEAD(&fence->cb_list);
list_for_each_entry_safe(cur, tmp, &cb_list, node) {
INIT_LIST_HEAD(&cur->node);
cur->func(fence, cur);
}
return 0;
@@ -234,7 +235,8 @@ void dma_fence_release(struct kref *kref)
trace_dma_fence_destroy(fence);
- if (WARN(!list_empty(&fence->cb_list),
- if (WARN(!list_empty(&fence->cb_list) &&
"Fence %s:%s:%llx:%llx released with pending signals!\n", fence->ops->get_driver_name(fence), fence->ops->get_timeline_name(fence),!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags),
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 2bc9c460e78d..09c68dda2098 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -114,18 +114,18 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) }
static void -__dma_fence_signal__notify(struct dma_fence *fence) +__dma_fence_signal__notify(struct dma_fence *fence,
const struct list_head *list)
{ struct dma_fence_cb *cur, *tmp;
lockdep_assert_held(fence->lock); lockdep_assert_irqs_disabled();
- list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
- list_for_each_entry_safe(cur, tmp, list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); }
INIT_LIST_HEAD(&fence->cb_list); }
void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
@@ -187,11 +187,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) list_for_each_safe(pos, next, &signal) { struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
struct list_head cb_list;
spin_lock(&rq->lock);
__dma_fence_signal__notify(&rq->fence);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
__dma_fence_signal__notify(&rq->fence, &cb_list);
spin_unlock(&rq->lock);
i915_request_put(rq);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 434dfadb0e52..178a6cd1a06f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -185,6 +185,9 @@ static long vmw_fence_wait(struct dma_fence *f, bool intr, signed long timeout)
spin_lock(f->lock);
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &f->flags))
goto out;
- if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out;
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 2ce4d877d33e..6c36502a0562 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -65,17 +65,26 @@ struct dma_fence_cb; struct dma_fence { spinlock_t *lock; const struct dma_fence_ops *ops;
- /* We clear the callback list on kref_put so that by the time we
* release the fence it is unused. No one should be adding to the cb_list
* that they don't themselves hold a reference for.
- /*
* We clear the callback list on kref_put so that by the time we
* release the fence it is unused. No one should be adding to the
* cb_list that they don't themselves hold a reference for.
*
* The lifetime of the timestamp is similarly tied to both the
* rcu freelist and the cb_list. The timestamp is only set upon
* signaling while simultaneously notifying the cb_list. Ergo, we
* only use either the cb_list of timestamp. Upon destruction,
* neither are accessible, and so we can use the rcu. This means
* that the cb_list is *only* valid until the signal bit is set,
*/ union { struct rcu_head rcu; struct list_head cb_list;* and to read either you *must* hold a reference to the fence.
ktime_t timestamp;
Maybe sort this cb_list, timestamp, rcu to better note in which order it is used in the lifetime of a fence.
Apart from that looks good to me, Christian.
}; u64 context; u64 seqno;
- ktime_t timestamp; unsigned long flags; struct kref refcount; int error;