On 09/06/2021 07:34, Thomas Hellström wrote:
From: Chris Wilson chris@chris-wilson.co.uk
Set up a default migration context on the GT and use it from the selftests. Add a perf selftest and make sure we exercise LMEM if available.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Co-developed-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/i915/gt/intel_gt.c | 4 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 + drivers/gpu/drm/i915/gt/intel_migrate.c | 4 +- drivers/gpu/drm/i915/gt/selftest_migrate.c | 227 +++++++++++++++++- .../drm/i915/selftests/i915_perf_selftests.h | 1 + 5 files changed, 232 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 2161bf01ef8b..67ef057ae918 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -13,6 +13,7 @@ #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" +#include "intel_migrate.h" #include "intel_mocs.h" #include "intel_rc6.h" #include "intel_renderstate.h" @@ -626,6 +627,8 @@ int intel_gt_init(struct intel_gt *gt) if (err) goto err_gt;
- intel_migrate_init(>->migrate, gt);
- goto out_fw; err_gt: __intel_gt_disable(gt);
@@ -649,6 +652,7 @@ void intel_gt_driver_remove(struct intel_gt *gt) { __intel_gt_disable(gt);
intel_migrate_fini(>->migrate); intel_uc_driver_remove(>->uc);
intel_engines_release(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index fecfacf551d5..7450935f2ca8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -24,6 +24,7 @@ #include "intel_reset_types.h" #include "intel_rc6_types.h" #include "intel_rps_types.h" +#include "intel_migrate_types.h" #include "intel_wakeref.h"
struct drm_i915_private; @@ -145,6 +146,8 @@ struct intel_gt {
struct i915_vma *scratch;
- struct intel_migrate migrate;
- struct intel_gt_info { intel_engine_mask_t engine_mask; u8 num_engines;
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index fda05ce3eb9c..935b5f14ff21 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -418,10 +418,9 @@ intel_context_migrate_copy(struct intel_context *ce, struct i915_request *rq; int err;
- GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); *out = NULL;
/* GEM_BUG_ON(ce->vm != migrate_vm); */
GEM_BUG_ON(ce->ring->size < SZ_64K);
do {
@@ -536,6 +535,7 @@ intel_context_migrate_clear(struct intel_context *ce, struct i915_request *rq; int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); *out = NULL;
GEM_BUG_ON(ce->ring->size < SZ_64K);
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 159c8656e1b0..396c81364399 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -3,6 +3,8 @@
- Copyright © 2020 Intel Corporation
*/
+#include <linux/sort.h>
#include "selftests/i915_random.h"
static const unsigned int sizes[] = {
@@ -441,14 +443,229 @@ int intel_migrate_live_selftests(struct drm_i915_private *i915) SUBTEST(thread_global_copy), SUBTEST(thread_global_clear), };
- struct intel_migrate m;
- struct intel_gt *gt = &i915->gt;
- if (!gt->migrate.context)
return 0;
- return i915_subtests(tests, >->migrate);
+}
+static struct drm_i915_gem_object * +create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) +{
- struct drm_i915_gem_object *obj = NULL; int err;
- if (intel_migrate_init(&m, &i915->gt))
- if (try_lmem && HAS_LMEM(gt->i915))
Could drop the HAS_LMEM()
It's elsewhere in the driver normal to just do: obj = create_lmem() if (IS_ERR(obj)) obj = create_internal()
Reviewed-by: Matthew Auld matthew.auld@intel.com
obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
- if (IS_ERR_OR_NULL(obj)) {
obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj))
return obj;
- }
- i915_gem_object_trylock(obj);
- err = i915_gem_object_pin_pages(obj);
- if (err) {
i915_gem_object_unlock(obj);
i915_gem_object_put(obj);
return ERR_PTR(err);
- }
- return obj;
+}
+static int wrap_ktime_compare(const void *A, const void *B) +{
- const ktime_t *a = A, *b = B;
- return ktime_compare(*a, *b);
+}
+static int __perf_clear_blt(struct intel_context *ce,
struct scatterlist *sg,
enum i915_cache_level cache_level,
bool is_lmem,
size_t sz)
+{
- ktime_t t[5];
- int pass;
- int err = 0;
- for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct i915_request *rq;
ktime_t t0, t1;
t0 = ktime_get();
err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
is_lmem, 0, &rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
err = -EIO;
i915_request_put(rq);
}
if (err)
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
- }
- if (err)
return err;
- sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
- pr_info("%s: %zd KiB fill: %lld MiB/s\n",
ce->engine->name, sz >> 10,
div64_u64(mul_u32_u32(4 * sz,
1000 * 1000 * 1000),
t[1] + 2 * t[2] + t[3]) >> 20);
- return 0;
+}
+static int perf_clear_blt(void *arg) +{
- struct intel_gt *gt = arg;
- static const unsigned long sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
SZ_64M
- };
- int i;
- for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *dst;
int err;
dst = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(dst))
return PTR_ERR(dst);
err = __perf_clear_blt(gt->migrate.context,
dst->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_object_is_lmem(dst),
sizes[i]);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);
if (err)
return err;
- }
- return 0;
+}
+static int __perf_copy_blt(struct intel_context *ce,
struct scatterlist *src,
enum i915_cache_level src_cache_level,
bool src_is_lmem,
struct scatterlist *dst,
enum i915_cache_level dst_cache_level,
bool dst_is_lmem,
size_t sz)
+{
- ktime_t t[5];
- int pass;
- int err = 0;
- for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
struct i915_request *rq;
ktime_t t0, t1;
t0 = ktime_get();
err = intel_context_migrate_copy(ce, NULL,
src, src_cache_level,
src_is_lmem,
dst, dst_cache_level,
dst_is_lmem,
&rq);
if (rq) {
if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
err = -EIO;
i915_request_put(rq);
}
if (err)
break;
t1 = ktime_get();
t[pass] = ktime_sub(t1, t0);
- }
- if (err)
return err;
- sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
- pr_info("%s: %zd KiB copy: %lld MiB/s\n",
ce->engine->name, sz >> 10,
div64_u64(mul_u32_u32(4 * sz,
1000 * 1000 * 1000),
t[1] + 2 * t[2] + t[3]) >> 20);
- return 0;
+}
+static int perf_copy_blt(void *arg) +{
- struct intel_gt *gt = arg;
- static const unsigned long sizes[] = {
SZ_4K,
SZ_64K,
SZ_2M,
SZ_64M
- };
- int i;
- for (i = 0; i < ARRAY_SIZE(sizes); i++) {
struct drm_i915_gem_object *src, *dst;
int err;
src = create_init_lmem_internal(gt, sizes[i], true);
if (IS_ERR(src))
return PTR_ERR(src);
dst = create_init_lmem_internal(gt, sizes[i], false);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto err_src;
}
err = __perf_copy_blt(gt->migrate.context,
src->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_object_is_lmem(src),
dst->mm.pages->sgl,
I915_CACHE_NONE,
i915_gem_object_is_lmem(dst),
sizes[i]);
i915_gem_object_unlock(dst);
i915_gem_object_put(dst);
+err_src:
i915_gem_object_unlock(src);
i915_gem_object_put(src);
if (err)
return err;
- }
- return 0;
+}
+int intel_migrate_perf_selftests(struct drm_i915_private *i915) +{
- static const struct i915_subtest tests[] = {
SUBTEST(perf_clear_blt),
SUBTEST(perf_copy_blt),
- };
- struct intel_gt *gt = &i915->gt;
- if (intel_gt_is_wedged(gt)) return 0;
- err = i915_subtests(tests, &m);
- intel_migrate_fini(&m);
- if (!gt->migrate.context)
return 0;
- return err;
- return intel_gt_live_subtests(tests, gt); }
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h index c2389f8a257d..5077dc3c3b8c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_perf_selftests.h @@ -17,5 +17,6 @@ */ selftest(engine_cs, intel_engine_cs_perf_selftests) selftest(request, i915_request_perf_selftests) +selftest(migrate, intel_migrate_perf_selftests) selftest(blt, i915_gem_object_blt_perf_selftests) selftest(region, intel_memory_region_perf_selftests)