Re: [PATCH v7 13/15] drm/tegra: Implement job submission part of new UAPI

15 Jun 2021

      On 10/06/2021 12:04, Mikko Perttunen wrote:
...
Implement the job submission IOCTL with a minimum feature set.
Signed-off-by: Mikko Perttunen mperttunen@nvidia.com
v7:

Allocate gather BO with DMA API to get page-aligned
memory
Add error prints to a few places where they were missing

v6:

Remove sgt bypass path in gather_bo - this would cause
cache maintenance to be skipped and is unnecessary in
general.
Changes related to moving to using syncpoint IDs
Add syncobj related code
Print warning on submit failure describing the issue
Use ARCH_DMA_ADDR_T_64BIT to check if that is indeed
the case
Add support for relative syncpoint wait
Use pm_runtime_resume_and_get
Only try to resume engines that support runtime PM
Removed uapi subdirectory
Don't use "copy_err" variables for copy_from_user
return value
Fix setting of blocklinear flag

v5:

Add 16K size limit to copies from userspace.
Guard RELOC_BLOCKLINEAR flag handling to only exist in ARM64
to prevent oversized shift on 32-bit platforms.

v4:

Remove all features that are not strictly necessary.
Split into two patches.

v3:

Remove WRITE_RELOC. Relocations are now patched implicitly
when patching is needed.
Directly call PM runtime APIs on devices instead of using
power_on/power_off callbacks.
Remove incorrect mutex unlock in tegra_drm_ioctl_channel_open
Use XA_FLAGS_ALLOC1 instead of XA_FLAGS_ALLOC
Accommodate for removal of timeout field and inlining of
syncpt_incrs array.
Copy entire user arrays at a time instead of going through
elements one-by-one.
Implement waiting of DMA reservations.
Split out gather_bo implementation into a separate file.
Fix length parameter passed to sg_init_one in gather_bo
Cosmetic cleanup.

drivers/gpu/drm/tegra/Makefile    |   2 +
 drivers/gpu/drm/tegra/drm.c       |   4 +-
 drivers/gpu/drm/tegra/gather_bo.c |  82 +++++
 drivers/gpu/drm/tegra/gather_bo.h |  24 ++
 drivers/gpu/drm/tegra/submit.c    | 549 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/submit.h    |  17 +
 6 files changed, 677 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.c
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.h
 create mode 100644 drivers/gpu/drm/tegra/submit.c
 create mode 100644 drivers/gpu/drm/tegra/submit.h
...
...
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,

		   struct drm_file *file)

+{

struct tegra_drm_file *fpriv = file->driver_priv;
struct drm_tegra_channel_submit *args = data;
struct tegra_drm_submit_data *job_data;
struct drm_syncobj *syncobj = NULL;
struct tegra_drm_context *ctx;
struct host1x_job *job;
struct gather_bo *bo;
u32 i;
int err;

mutex_lock(&fpriv->lock);
ctx = xa_load(&fpriv->contexts, args->channel_ctx);
if (!ctx) {
mutex_unlock(&fpriv->lock);

pr_err_ratelimited("%s: %s: invalid channel_ctx '%d'", __func__,

	current->comm, args->channel_ctx);

return -EINVAL;

}

if (args->syncobj_in) {
struct dma_fence *fence;

err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence);

if (err) {

	SUBMIT_ERR(ctx, "invalid syncobj_in '%d'", args->syncobj_in);

	goto unlock;

}

err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000));

dma_fence_put(fence);

if (err) {

	SUBMIT_ERR(ctx, "wait for syncobj_in timed out");

	goto unlock;

}

}

if (args->syncobj_out) {
syncobj = drm_syncobj_find(file, args->syncobj_out);

if (!syncobj) {

	SUBMIT_ERR(ctx, "invalid syncobj_out '%d'", args->syncobj_out);

	err = -ENOENT;

	goto unlock;

}

}

/* Allocate gather BO and copy gather words in. */
err = submit_copy_gather_data(&bo, drm->dev, ctx, args);
if (err)
goto unlock;

job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
if (!job_data) {
SUBMIT_ERR(ctx, "failed to allocate memory for job data");

err = -ENOMEM;

goto put_bo;

}

/* Get data buffer mappings and do relocation patching. */
err = submit_process_bufs(ctx, bo, args, job_data);
if (err)
goto free_job_data;

/* Allocate host1x_job and add gathers and waits to it. */
err = submit_create_job(&job, ctx, bo, args, job_data,
		&fpriv->syncpoints);

if (err)
goto free_job_data;

/* Map gather data for Host1x. */
err = host1x_job_pin(job, ctx->client->base.dev);
if (err) {
SUBMIT_ERR(ctx, "failed to pin job: %d", err);

goto put_job;

}

/* Boot engine. */
if (pm_runtime_enabled(ctx->client->base.dev)) {
err = pm_runtime_resume_and_get(ctx->client->base.dev);

if (err < 0) {

	SUBMIT_ERR(ctx, "could not power up engine: %d", err);

	goto unpin_job;

}

}

job->user_data = job_data;
job->release = release_job;
job->timeout = 10000;

/*
* job_data is now part of job reference counting, so don't release

* it from here.

*/

job_data = NULL;

/* Submit job to hardware. */
err = host1x_job_submit(job);
if (err) {
SUBMIT_ERR(ctx, "host1x job submission failed: %d", err);

goto unpin_job;

}

If we fail here, it appears that we may leave rpm enabled. Should we be
calling pm_runtime_put() for any failures from here on?
Cheers
Jon
-- 
nvpublic

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH v7 13/15] drm/tegra: Implement job submission part of new UAPI

Signed-off-by: Mikko Perttunen mperttunen@nvidia.com