Re: [PATCH v3 12/37] drm/i915/blt: support copying objects

10 Aug 2019


      Quoting Matthew Auld (2019-08-09 23:26:18)
...
+struct i915_vma *intel_emit_vma_copy_blt(struct intel_engine_pool_node **p,

                                   struct intel_context *ce,


                                   struct i915_vma *src,


                                   struct i915_vma *dst)


+{

  struct drm_i915_private *i915 = ce->vm->i915;


  const u32 block_size = S16_MAX * PAGE_SIZE;


  struct intel_engine_pool_node *pool;


  struct i915_vma *batch;


  u64 src_offset, dst_offset;


  u64 count;


  u64 rem;


  u32 size;


  u32 *cmd;


  int err;


  GEM_BUG_ON(src->size != dst->size);


  count = div_u64(dst->size, block_size);


  size = (1 + 11 * count) * sizeof(u32);


  size = round_up(size, PAGE_SIZE);


  pool = intel_engine_pool_get(&ce->engine->pool, size);


  if (IS_ERR(pool))


          return ERR_CAST(pool);


  cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);


  if (IS_ERR(cmd)) {


          err = PTR_ERR(cmd);


          goto out_put;


  }


  rem = src->size;


  src_offset = src->node.start;


  dst_offset = dst->node.start;


  do {


          u32 size = min_t(u64, rem, block_size);


          GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);


          if (INTEL_GEN(i915) >= 9) {


                  *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);


                  *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;


                  *cmd++ = 0;


                  *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;


                  *cmd++ = lower_32_bits(dst_offset);


                  *cmd++ = upper_32_bits(dst_offset);


                  *cmd++ = 0;


                  *cmd++ = PAGE_SIZE;


                  *cmd++ = lower_32_bits(src_offset);


                  *cmd++ = upper_32_bits(src_offset);


          } else if (INTEL_GEN(i915) >= 8) {


                  *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);


                  *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;


                  *cmd++ = 0;


                  *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;


                  *cmd++ = lower_32_bits(dst_offset);


                  *cmd++ = upper_32_bits(dst_offset);


                  *cmd++ = 0;


                  *cmd++ = PAGE_SIZE;


                  *cmd++ = lower_32_bits(src_offset);


                  *cmd++ = upper_32_bits(src_offset);


          } else {


                  *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);


                  *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;


                  *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;


                  *cmd++ = dst_offset;


                  *cmd++ = PAGE_SIZE;


                  *cmd++ = src_offset;


          }


          /* Allow ourselves to be preempted in between blocks. */


          *cmd++ = MI_ARB_CHECK;


          src_offset += size;


          dst_offset += size;


          rem -= size;


  } while (rem);


  *cmd = MI_BATCH_BUFFER_END;


  intel_gt_chipset_flush(ce->vm->gt);


  i915_gem_object_unpin_map(pool->obj);


  batch = i915_vma_instance(pool->obj, ce->vm, NULL);


  if (IS_ERR(batch)) {


          err = PTR_ERR(batch);


          goto out_put;


  }


  err = i915_vma_pin(batch, 0, 0, PIN_USER);


  if (unlikely(err))


          goto out_put;


  *p = pool;


  return batch;


+out_put:

  intel_engine_pool_put(pool);


  return ERR_PTR(err);


+}


+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,

                       struct drm_i915_gem_object *dst,


                       struct intel_context *ce)


+{

  struct drm_gem_object *objs[] = { &src->base, &dst->base };


  struct i915_address_space *vm = ce->vm;


  struct intel_engine_pool_node *pool;


  struct ww_acquire_ctx acquire;


  struct i915_vma *vma_src, *vma_dst;


  struct i915_vma *batch;


  struct i915_request *rq;


  int err;


  vma_src = i915_vma_instance(src, vm, NULL);


  if (IS_ERR(vma_src))


          return PTR_ERR(vma_src);


  err = i915_vma_pin(vma_src, 0, 0, PIN_USER);


  if (unlikely(err))


          return err;


  vma_dst = i915_vma_instance(dst, vm, NULL);


  if (IS_ERR(vma_dst))


          goto out_unpin_src;


  err = i915_vma_pin(vma_dst, 0, 0, PIN_USER);


  if (unlikely(err))


          goto out_unpin_src;


  intel_engine_pm_get(ce->engine);


  batch = intel_emit_vma_copy_blt(&pool, ce, vma_src, vma_dst);


  if (IS_ERR(batch)) {


          err = PTR_ERR(batch);


          goto out_unpin_dst;


  }


  rq = intel_context_create_request(ce);


  if (IS_ERR(rq)) {


          err = PTR_ERR(rq);


          goto out_batch;


  }


  i915_vma_lock(batch);


  err = i915_vma_move_to_active(batch, rq, 0);


  i915_vma_unlock(batch);


  if (unlikely(err))


          goto out_request;


  err = intel_engine_pool_mark_active(pool, rq);


  if (unlikely(err))


          goto out_request;


  err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);


  if (unlikely(err))


          goto out_request;


  if (src->cache_dirty & ~src->cache_coherent)


          i915_gem_clflush_object(src, 0);


  if (dst->cache_dirty & ~dst->cache_coherent)


          i915_gem_clflush_object(dst, 0);


  err = i915_request_await_object(rq, src, false);


  if (unlikely(err))


          goto out_unlock;


  err = i915_vma_move_to_active(vma_src, rq, 0);


  if (unlikely(err))


          goto out_unlock;


  err = i915_request_await_object(rq, dst, true);


  if (unlikely(err))


          goto out_unlock;


  err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE);


  if (unlikely(err))


          goto out_unlock;


Strictly, wait on all objects, then setup all signals. Avoids any nasty
cycles in the dependency graphs. Such as if someone passed in src = dst.
Time for another selftest ;)
for (i = 0; i < ARRAY_SIZE(obj); i++) {
    clflush_object(obj[i]);
    await_object(rq, obj[i]);
}
for (i = 0; i < ARRAY_SIZE(obj); i++)
    move_to_active(obj[i]);
...


  if (ce->engine->emit_init_breadcrumb) {


          err = ce->engine->emit_init_breadcrumb(rq);


          if (unlikely(err))


                  goto out_unlock;


  }


  err = ce->engine->emit_bb_start(rq,


                                  batch->node.start, batch->node.size,


                                  0);


+out_unlock:

  drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);


+out_request:

  if (unlikely(err))


          i915_request_skip(rq, err);


  i915_request_add(rq);


+out_batch:

  i915_vma_unpin(batch);


  intel_engine_pool_put(pool);


+out_unpin_dst:

  i915_vma_unpin(vma_dst);


  intel_engine_pm_put(ce->engine);


+out_unpin_src:

  i915_vma_unpin(vma_src);


  return err;


+}

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH v3 12/37] drm/i915/blt: support copying objects