[PATCH 1/3] lib/scatterlist: Add contiguous DMA chunks helpers

List overview All Threads
Download

newer

older

[PATCH] drm/panel: rpi: Drop...

[PATCH] drm/radeon: fix r1xx/r2xx...

Thierry Reding

3 Dec 2019 3 Dec '19

4:32 p.m.

From: Thierry Reding treding@nvidia.com

Add a few helpers to count the number of contiguous DMA chunks found in an SG table. This is useful to determine whether or not a mapping can be used by drivers whose devices need contiguous memory.

Signed-off-by: Thierry Reding treding@nvidia.com --- include/linux/scatterlist.h | 11 +++++++++++ lib/scatterlist.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+)

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 6eec50fb36c8..51a8416623a9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -262,6 +262,7 @@ int sg_split(struct scatterlist *in, const int in_mapped_nents, const size_t *split_sizes, struct scatterlist **out, int *out_mapped_nents, gfp_t gfp_mask); +unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents);

typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); @@ -306,6 +307,16 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, size_t buflen, off_t skip);

+static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt) +{ + return sg_dma_count_chunks(sgt->sgl, sgt->nents); +} + +static inline bool sgt_dma_contiguous(struct sg_table *sgt) +{ + return sgt_dma_count_chunks(sgt) == 1; +} + /* * Maximum number of entries that will be allocated in one piece, if * a list larger than this is required then chaining will be utilized. diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c2cf2c311b7d..4c0d11968f8d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -142,6 +142,35 @@ void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) } EXPORT_SYMBOL(sg_init_one);

+/** + * sg_dma_count_chunks - return number of contiguous DMA chunks in scatterlist + * @sgl: SG table + * @nents: number of entries in SG table + */ +unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents) +{ + dma_addr_t next = ~(dma_addr_t)0; + unsigned int count = 0, i; + struct scatterlist *s; + + for_each_sg(sgl, s, nents, i) { + /* + * sg_dma_address(s) is only valid for entries that have + * sg_dma_len(s) != 0. + */ + if (!sg_dma_len(s)) + continue; + + if (sg_dma_address(s) != next) { + next = sg_dma_address(s) + sg_dma_len(s); + count++; + } + } + + return count; +} +EXPORT_SYMBOL(sg_dma_count_chunks); + /* * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree * helpers.

-- 2.23.0

Show replies by date

Thierry Reding

3 Dec 3 Dec

4:32 p.m.

New subject: [PATCH 2/3] drm/cma-helper: Use sgt_dma_contiguous() helper

From: Thierry Reding treding@nvidia.com

Functionality to count the number of contiguous DMA chunks in an SG table has been added to the scatterlist library. Use this functionality to replace an open-coded version in the GEM/CMA helpers.

Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/drm_gem_cma_helper.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 12e98fb28229..5bbae9654cc6 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -471,26 +471,8 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev, { struct drm_gem_cma_object *cma_obj;

- if (sgt->nents != 1) { - /* check if the entries in the sg_table are contiguous */ - dma_addr_t next_addr = sg_dma_address(sgt->sgl); - struct scatterlist *s; - unsigned int i; - - for_each_sg(sgt->sgl, s, sgt->nents, i) { - /* - * sg_dma_address(s) is only valid for entries - * that have sg_dma_len(s) != 0 - */ - if (!sg_dma_len(s)) - continue; - - if (sg_dma_address(s) != next_addr) - return ERR_PTR(-EINVAL); - - next_addr = sg_dma_address(s) + sg_dma_len(s); - } - } + if (!sgt_dma_contiguous(sgt)) + return ERR_PTR(-EINVAL);

/* Create a CMA GEM buffer. */ cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size);

-- 2.23.0

Daniel Vetter

5:28 p.m.

New subject: [PATCH 2/3] drm/cma-helper: Use sgt_dma_contiguous() helper

On Tue, Dec 03, 2019 at 05:32:02PM +0100, Thierry Reding wrote:

...

From: Thierry Reding treding@nvidia.com

Functionality to count the number of contiguous DMA chunks in an SG table has been added to the scatterlist library. Use this functionality to replace an open-coded version in the GEM/CMA helpers.

Signed-off-by: Thierry Reding treding@nvidia.com

On the first 2 patches:

Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch

...

drivers/gpu/drm/drm_gem_cma_helper.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c index 12e98fb28229..5bbae9654cc6 100644 --- a/drivers/gpu/drm/drm_gem_cma_helper.c +++ b/drivers/gpu/drm/drm_gem_cma_helper.c @@ -471,26 +471,8 @@ drm_gem_cma_prime_import_sg_table(struct drm_device *dev, { struct drm_gem_cma_object *cma_obj;
if (sgt->nents != 1) {
/* check if the entries in the sg_table are contiguous */
dma_addr_t next_addr = sg_dma_address(sgt->sgl);
struct scatterlist *s;
unsigned int i;
for_each_sg(sgt->sgl, s, sgt->nents, i) {
	/*
	 * sg_dma_address(s) is only valid for entries
	 * that have sg_dma_len(s) != 0
	 */
	if (!sg_dma_len(s))
		continue;
	if (sg_dma_address(s) != next_addr)
		return ERR_PTR(-EINVAL);
	next_addr = sg_dma_address(s) + sg_dma_len(s);
}
}
if (!sgt_dma_contiguous(sgt))
return ERR_PTR(-EINVAL);
/* Create a CMA GEM buffer. */ cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size);
-- 2.23.0

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Thierry Reding

4:32 p.m.

New subject: [PATCH 3/3] drm/tegra: Implement correct DMA-BUF semantics

From: Thierry Reding treding@nvidia.com

DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object.

Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/gem.c | 131 +++++++++++++++++++++------------- drivers/gpu/drm/tegra/plane.c | 47 ++++-------- drivers/gpu/drm/tegra/plane.h | 2 +- drivers/gpu/host1x/job.c | 127 ++++++++++++-------------------- drivers/gpu/host1x/job.h | 6 +- include/linux/host1x.h | 32 ++++++--- 6 files changed, 162 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 1237df157e05..c085499067ed 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,71 +27,77 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); }

-/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, - unsigned int nents, gfp_t gfp_mask) +static struct host1x_bo_mapping * +tegra_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction direction) { - struct scatterlist *dst; - unsigned int i; + struct tegra_bo *obj = host1x_to_tegra_bo(bo); + struct drm_gem_object *gem = &obj->gem; + struct host1x_bo_mapping *map; int err;

- err = sg_alloc_table(sgt, nents, gfp_mask); - if (err < 0) - return err; - - dst = sgt->sgl; + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM);

- for (i = 0; i < nents; i++) { - sg_set_page(dst, sg_page(sg), sg->length, 0); - dst = sg_next(dst); - sg = sg_next(sg); - } - - return 0; -} - -static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys) -{ - struct tegra_bo *obj = host1x_to_tegra_bo(bo); - struct sg_table *sgt; - int err; + map->direction = direction; + map->dev = dev; + map->bo = bo;

/* * If we've manually mapped the buffer object through the IOMMU, make * sure to return the IOVA address of our mapping. */ - if (phys && obj->mm) { - *phys = obj->iova; - return NULL; + if (obj->mm) { + map->phys = obj->iova; + return map; + } + + /* + * Imported buffers need special treatment to satisfy the semantics of + * DMA-BUF. + */ + if (gem->import_attach) { + struct dma_buf *buf = gem->import_attach->dmabuf; + + map->attach = dma_buf_attach(buf, dev); + if (IS_ERR(map->attach)) { + err = PTR_ERR(map->attach); + goto free; + } + + map->sgt = dma_buf_map_attachment(map->attach, direction); + if (IS_ERR(map->sgt)) { + dma_buf_detach(buf, map->attach); + err = PTR_ERR(map->sgt); + goto free; + } + + map->chunks = sgt_dma_count_chunks(map->sgt); + map->phys = sg_dma_address(map->sgt->sgl); + map->size = gem->size; + + return map; }

/* * If we don't have a mapping for this buffer yet, return an SG table * so that host1x can do the mapping for us via the DMA API. */ - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + }

if (obj->pages) { /* * If the buffer object was allocated from the explicit IOMMU * API code paths, construct an SG table from the pages. */ - err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, - 0, obj->gem.size, GFP_KERNEL); - if (err < 0) - goto free; - } else if (obj->sgt) { - /* - * If the buffer object already has an SG table but no pages - * were allocated for it, it means the buffer was imported and - * the SG table needs to be copied to avoid overwriting any - * other potential users of the original SG table. - */ - err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents, - GFP_KERNEL); + err = sg_alloc_table_from_pages(map->sgt, obj->pages, + obj->num_pages, 0, + obj->gem.size, GFP_KERNEL); if (err < 0) goto free; } else { @@ -100,25 +106,48 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */ - err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, + err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, obj->gem.size); if (err < 0) goto free; }

- return sgt; + err = dma_map_sg(dev, map->sgt->sgl, map->sgt->nents, direction); + if (!err) { + err = -ENOMEM; + goto free_sgt; + } + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = gem->size; + map->chunks = err; + + return map;

+free_sgt: + sg_free_table(map->sgt); free: - kfree(sgt); + kfree(map->sgt); + kfree(map); return ERR_PTR(err); }

-static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); + if (!map) + return; + + if (map->attach) { + dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); + dma_buf_detach(map->attach->dmabuf, map->attach); + } else { + dma_unmap_sg(map->dev, map->sgt->sgl, map->sgt->nents, + map->direction); + sg_free_table(map->sgt); + kfree(map->sgt); } + + kfree(map); }

static void *tegra_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..28cf668026da 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -67,7 +67,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)

for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR; - copy->sgt[i] = NULL; + copy->map[i] = NULL; }

return &copy->base; @@ -114,18 +114,11 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);

if (!dc->client.group) { - struct sg_table *sgt; + struct host1x_bo_mapping *map;

- sgt = host1x_bo_pin(dc->dev, &bo->base, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } - - err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - if (err == 0) { - err = -ENOMEM; + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; }

@@ -135,13 +128,13 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) * map its SG table to a single contiguous chunk of * I/O virtual memory. */ - if (err > 1) { + if (map->chunks > 1) { err = -EINVAL; goto unpin; }

- state->iova[i] = sg_dma_address(sgt->sgl); - state->sgt[i] = sgt; + state->iova[i] = map->phys; + state->map[i] = map; } else { state->iova[i] = bo->iova; } @@ -153,14 +146,9 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);

while (i--) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; }

return err; @@ -171,20 +159,11 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i;

for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - - if (!dc->client.group) { - struct sg_table *sgt = state->sgt[i]; - - if (sgt) { - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - } - } + if (!dc->client.group) + host1x_bo_unpin(state->map[i]);

state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } }

diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index a158a915109a..08df1ba1f567 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -39,7 +39,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base;

- struct sg_table *sgt[3]; + struct host1x_bo_mapping *map[3]; dma_addr_t iova[3];

struct tegra_bo_tiling tiling; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 60b2fedd0061..88e445553df3 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -99,6 +99,7 @@ EXPORT_SYMBOL(host1x_job_add_gather);

static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; unsigned int i; @@ -108,8 +109,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)

for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; - struct sg_table *sgt; + enum dma_data_direction direction; + struct host1x_bo_mapping *map;

reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -117,63 +118,40 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }

- if (client->group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } - - if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; - - case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break;

- case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break;

- default: - err = -EINVAL; - goto unpin; - } + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break;

- err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir); - if (!err) { - err = -ENOMEM; - goto unpin; - } + default: + err = -EINVAL; + goto unpin; + }

- job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); + map = host1x_bo_pin(dev, reloc->target.bo, direction); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; }

- job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; }

for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; unsigned int j; @@ -184,15 +162,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }

- sgt = host1x_bo_pin(host->dev, g->bo, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; }

if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { - for_each_sg(sgt->sgl, sg, sgt->nents, j) + for_each_sg(map->sgt->sgl, sg, map->sgt->nents, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size);

shift = iova_shift(&host->iova); @@ -204,35 +183,24 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) }

err = iommu_map_sg(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt->sgl, sgt->nents, IOMMU_READ); + iova_dma_addr(&host->iova, alloc), + map->sgt->sgl, map->sgt->nents, + IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; goto unpin; }

- job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); - } else { - err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - if (!err) { - err = -ENOMEM; - goto unpin; - } - - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; }

- job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; - - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; }

return 0; @@ -608,24 +576,19 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i;

for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo;

if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { + map->size && host->domain) { iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); + map->size); free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); + iova_pfn(&host->iova, job->addr_phys[i])); }

- if (unpin->dev && sgt) - dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents, - unpin->dir); - - host1x_bo_unpin(dev, unpin->bo, sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); }

job->num_unpins = 0; diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..c28eebb8ce14 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -19,11 +19,7 @@ struct host1x_job_gather { };

struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - struct device *dev; - size_t size; - enum dma_data_direction dir; + struct host1x_bo_mapping *map; };

/* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 0254ebcdc0a7..f1e6fad19289 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H

#include <linux/device.h> +#include <linux/dma-direction.h> #include <linux/types.h>

enum host1x_class { @@ -72,12 +73,24 @@ struct host1x_client { struct host1x_bo; struct sg_table;

+struct host1x_bo_mapping { + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; +}; + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, + struct host1x_bo *bo, + enum dma_data_direction dir); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; @@ -102,17 +115,16 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); }

-static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline struct host1x_bo_mapping * +host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(dev, bo, dir); }

-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) { - bo->ops->unpin(dev, sgt); + map->bo->ops->unpin(map); }

static inline void *host1x_bo_mmap(struct host1x_bo *bo)

-- 2.23.0

Daniel Vetter

5:33 p.m.

New subject: [PATCH 3/3] drm/tegra: Implement correct DMA-BUF semantics

On Tue, Dec 03, 2019 at 05:32:03PM +0100, Thierry Reding wrote:

...

From: Thierry Reding treding@nvidia.com

DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object.

Signed-off-by: Thierry Reding treding@nvidia.com

I didn't check the full details, but this looks like it's doing things as intended.

Acked-by: Daniel Vetter daniel.vetter@ffwll.ch

Cheers, Daniel

...

drivers/gpu/drm/tegra/gem.c | 131 +++++++++++++++++++++------------- drivers/gpu/drm/tegra/plane.c | 47 ++++-------- drivers/gpu/drm/tegra/plane.h | 2 +- drivers/gpu/host1x/job.c | 127 ++++++++++++-------------------- drivers/gpu/host1x/job.h | 6 +- include/linux/host1x.h | 32 ++++++--- 6 files changed, 162 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 1237df157e05..c085499067ed 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,71 +27,77 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); }

-/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg,
		  unsigned int nents, gfp_t gfp_mask)
+static struct host1x_bo_mapping * +tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
    enum dma_data_direction direction)
{

struct scatterlist *dst;

unsigned int i;

struct tegra_bo *obj = host1x_to_tegra_bo(bo);

struct drm_gem_object *gem = &obj->gem;

struct host1x_bo_mapping *map; int err;
err = sg_alloc_table(sgt, nents, gfp_mask);

if (err < 0)
return err;
dst = sgt->sgl;
map = kzalloc(sizeof(*map), GFP_KERNEL);

if (!map)
return ERR_PTR(-ENOMEM);
for (i = 0; i < nents; i++) {
sg_set_page(dst, sg_page(sg), sg->length, 0);
dst = sg_next(dst);
sg = sg_next(sg);
}

return 0;
-}

-static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
		     dma_addr_t *phys)
-{

struct tegra_bo *obj = host1x_to_tegra_bo(bo);

struct sg_table *sgt;

int err;

map->direction = direction;

map->dev = dev;

map->bo = bo;

/*

If we've manually mapped the buffer object through the IOMMU, make

sure to return the IOVA address of our mapping.

*/
if (phys && obj->mm) {
*phys = obj->iova;
return NULL;
if (obj->mm) {
map->phys = obj->iova;
return map;
}

/*
* Imported buffers need special treatment to satisfy the semantics of
* DMA-BUF.
*/
if (gem->import_attach) {
struct dma_buf *buf = gem->import_attach->dmabuf;
map->attach = dma_buf_attach(buf, dev);
if (IS_ERR(map->attach)) {
	err = PTR_ERR(map->attach);
	goto free;
}
map->sgt = dma_buf_map_attachment(map->attach, direction);
if (IS_ERR(map->sgt)) {
	dma_buf_detach(buf, map->attach);
	err = PTR_ERR(map->sgt);
	goto free;
}
map->chunks = sgt_dma_count_chunks(map->sgt);
map->phys = sg_dma_address(map->sgt->sgl);
map->size = gem->size;
return map;
}

/*

If we don't have a mapping for this buffer yet, return an SG table

so that host1x can do the mapping for us via the DMA API.

*/
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);

if (!sgt)
return ERR_PTR(-ENOMEM);
map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);

if (!map->sgt) {
err = -ENOMEM;
goto free;
}

if (obj->pages) { /*

If the buffer object was allocated from the explicit IOMMU

API code paths, construct an SG table from the pages.

*/
err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages,
				0, obj->gem.size, GFP_KERNEL);
if (err < 0)
	goto free;
} else if (obj->sgt) {
/*
 * If the buffer object already has an SG table but no pages
 * were allocated for it, it means the buffer was imported and
 * the SG table needs to be copied to avoid overwriting any
 * other potential users of the original SG table.
 */
err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents,
			     GFP_KERNEL);
err = sg_alloc_table_from_pages(map->sgt, obj->pages,
				obj->num_pages, 0,
				obj->gem.size, GFP_KERNEL);
if (err < 0) goto free; } else {
@@ -100,25 +106,48 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */
err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova,
err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova,
	      obj->gem.size);
if (err < 0) goto free; }
return sgt;
err = dma_map_sg(dev, map->sgt->sgl, map->sgt->nents, direction);

if (!err) {
err = -ENOMEM;
goto free_sgt;
}

map->phys = sg_dma_address(map->sgt->sgl);

map->size = gem->size;

map->chunks = err;

return map;
+free_sgt:

sg_free_table(map->sgt);

free:

kfree(sgt);

kfree(map->sgt);

kfree(map); return ERR_PTR(err);

}

-static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) {
if (sgt) {
sg_free_table(sgt);
kfree(sgt);
if (!map)
return;
if (map->attach) {
dma_buf_unmap_attachment(map->attach, map->sgt, map->direction);
dma_buf_detach(map->attach->dmabuf, map->attach);
} else {
dma_unmap_sg(map->dev, map->sgt->sgl, map->sgt->nents,
	     map->direction);
sg_free_table(map->sgt);
kfree(map->sgt);
}
kfree(map);
}

static void *tegra_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..28cf668026da 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -67,7 +67,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)

for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR;
copy->sgt[i] = NULL;
copy->map[i] = NULL;
}

return &copy->base;
@@ -114,18 +114,11 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
if (!dc->client.group) {
	struct sg_table *sgt;
	struct host1x_bo_mapping *map;
	sgt = host1x_bo_pin(dc->dev, &bo->base, NULL);
	if (IS_ERR(sgt)) {
		err = PTR_ERR(sgt);
		goto unpin;
	}
	err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents,
			 DMA_TO_DEVICE);
	if (err == 0) {
		err = -ENOMEM;
	map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE);
	if (IS_ERR(map)) {
		err = PTR_ERR(map);
	goto unpin;
}
@@ -135,13 +128,13 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) * map its SG table to a single contiguous chunk of * I/O virtual memory. */
	if (err > 1) {
	if (map->chunks > 1) {
	err = -EINVAL;
	goto unpin;
}
	state->iova[i] = sg_dma_address(sgt->sgl);
	state->sgt[i] = sgt;
	state->iova[i] = map->phys;
	state->map[i] = map;
} else { state->iova[i] = bo->iova; }
@@ -153,14 +146,9 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);

while (i--) {
struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
struct sg_table *sgt = state->sgt[i];
dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
host1x_bo_unpin(dc->dev, &bo->base, sgt);
host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
state->sgt[i] = NULL;
state->map[i] = NULL;
}

return err;
@@ -171,20 +159,11 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i;

for (i = 0; i < state->base.fb->format->num_planes; i++) {
struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
if (!dc->client.group) {
	struct sg_table *sgt = state->sgt[i];
	if (sgt) {
		dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
			     DMA_TO_DEVICE);
		host1x_bo_unpin(dc->dev, &bo->base, sgt);
	}
}
if (!dc->client.group)
	host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
state->sgt[i] = NULL;
state->map[i] = NULL;
}
}

diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index a158a915109a..08df1ba1f567 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -39,7 +39,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base;

struct sg_table *sgt[3];

struct host1x_bo_mapping *map[3]; dma_addr_t iova[3];

struct tegra_bo_tiling tiling;

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 60b2fedd0061..88e445553df3 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -99,6 +99,7 @@ EXPORT_SYMBOL(host1x_job_add_gather);

static unsigned int pin_job(struct host1x *host, struct host1x_job *job) {

unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; unsigned int i;

@@ -108,8 +109,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)

for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i];
dma_addr_t phys_addr, *phys;
struct sg_table *sgt;
enum dma_data_direction direction;
struct host1x_bo_mapping *map;
reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) {
@@ -117,63 +118,40 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
if (client->group)
	phys = &phys_addr;
else
	phys = NULL;
sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
if (IS_ERR(sgt)) {
	err = PTR_ERR(sgt);
	goto unpin;
}
if (sgt) {
	unsigned long mask = HOST1X_RELOC_READ |
			     HOST1X_RELOC_WRITE;
	enum dma_data_direction dir;
	switch (reloc->flags & mask) {
	case HOST1X_RELOC_READ:
		dir = DMA_TO_DEVICE;
		break;
	case HOST1X_RELOC_WRITE:
		dir = DMA_FROM_DEVICE;
		break;
switch (reloc->flags & mask) {
case HOST1X_RELOC_READ:
	direction = DMA_TO_DEVICE;
	break;
	case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
		dir = DMA_BIDIRECTIONAL;
		break;
case HOST1X_RELOC_WRITE:
	direction = DMA_FROM_DEVICE;
	break;
	default:
		err = -EINVAL;
		goto unpin;
	}
case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
	direction = DMA_BIDIRECTIONAL;
	break;
	err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
	if (!err) {
		err = -ENOMEM;
		goto unpin;
	}
default:
	err = -EINVAL;
	goto unpin;
}
	job->unpins[job->num_unpins].dev = dev;
	job->unpins[job->num_unpins].dir = dir;
	phys_addr = sg_dma_address(sgt->sgl);
map = host1x_bo_pin(dev, reloc->target.bo, direction);
if (IS_ERR(map)) {
	err = PTR_ERR(map);
	goto unpin;
}
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
job->unpins[job->num_unpins].sgt = sgt;
job->addr_phys[job->num_unpins] = map->phys;
job->unpins[job->num_unpins].map = map;
job->num_unpins++; }

for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i];
struct host1x_bo_mapping *map;
size_t gather_size = 0; struct scatterlist *sg;
struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift; struct iova *alloc; unsigned int j;
@@ -184,15 +162,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
sgt = host1x_bo_pin(host->dev, g->bo, NULL);
if (IS_ERR(sgt)) {
	err = PTR_ERR(sgt);
map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE);
if (IS_ERR(map)) {
	err = PTR_ERR(map);
goto unpin;
}

if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
	for_each_sg(sgt->sgl, sg, sgt->nents, j)
	for_each_sg(map->sgt->sgl, sg, map->sgt->nents, j)
	gather_size += sg->length;
gather_size = iova_align(&host->iova, gather_size);

shift = iova_shift(&host->iova);
@@ -204,35 +183,24 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) }
	err = iommu_map_sg(host->domain,
			iova_dma_addr(&host->iova, alloc),
			sgt->sgl, sgt->nents, IOMMU_READ);
			   iova_dma_addr(&host->iova, alloc),
			   map->sgt->sgl, map->sgt->nents,
			   IOMMU_READ);
if (err == 0) {
	__free_iova(&host->iova, alloc);
	err = -EINVAL;
	goto unpin;
}
	job->unpins[job->num_unpins].size = gather_size;
	phys_addr = iova_dma_addr(&host->iova, alloc);
} else {
	err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
			 DMA_TO_DEVICE);
	if (!err) {
		err = -ENOMEM;
		goto unpin;
	}
	job->unpins[job->num_unpins].dev = host->dev;
	phys_addr = sg_dma_address(sgt->sgl);
	map->phys = iova_dma_addr(&host->iova, alloc);
	map->size = gather_size;
}
job->addr_phys[job->num_unpins] = phys_addr;
job->gather_addr_phys[i] = phys_addr;
job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
job->addr_phys[job->num_unpins] = map->phys;
job->unpins[job->num_unpins].map = map;
job->num_unpins++;
job->gather_addr_phys[i] = map->phys;
}

return 0;
@@ -608,24 +576,19 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i;

for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
struct device *dev = unpin->dev ?: host->dev;
struct sg_table *sgt = unpin->sgt;
struct host1x_bo_mapping *map = job->unpins[i].map;
struct host1x_bo *bo = map->bo;
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
    unpin->size && host->domain) {
    map->size && host->domain) {
iommu_unmap(host->domain, job->addr_phys[i],
		    unpin->size);
		    map->size);
free_iova(&host->iova,
		iova_pfn(&host->iova, job->addr_phys[i]));
		  iova_pfn(&host->iova, job->addr_phys[i]));
}
if (unpin->dev && sgt)
	dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
		     unpin->dir);
host1x_bo_unpin(dev, unpin->bo, sgt);
host1x_bo_put(unpin->bo);
host1x_bo_unpin(map);
host1x_bo_put(bo);
}

job->num_unpins = 0;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..c28eebb8ce14 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -19,11 +19,7 @@ struct host1x_job_gather { };

struct host1x_job_unpin_data {

struct host1x_bo *bo;

struct sg_table *sgt;

struct device *dev;

size_t size;

enum dma_data_direction dir;

struct host1x_bo_mapping *map;

};

/* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 0254ebcdc0a7..f1e6fad19289 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H

#include <linux/device.h> +#include <linux/dma-direction.h> #include <linux/types.h>

enum host1x_class { @@ -72,12 +73,24 @@ struct host1x_client { struct host1x_bo; struct sg_table;

+struct host1x_bo_mapping {

struct dma_buf_attachment *attach;

enum dma_data_direction direction;

struct host1x_bo *bo;

struct sg_table *sgt;

unsigned int chunks;

struct device *dev;

dma_addr_t phys;

size_t size;

+};

struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo);
struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
		dma_addr_t *phys);
void (*unpin)(struct device *dev, struct sg_table *sgt);
struct host1x_bo_mapping *(*pin)(struct device *dev,
			 struct host1x_bo *bo,
			 enum dma_data_direction dir);
void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr);
}; @@ -102,17 +115,16 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); }

-static inline struct sg_table *host1x_bo_pin(struct device *dev,
			     struct host1x_bo *bo,
			     dma_addr_t *phys)
+static inline struct host1x_bo_mapping * +host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
     enum dma_data_direction dir)
{

return bo->ops->pin(dev, bo, phys);

return bo->ops->pin(dev, bo, dir);

}

-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
		   struct sg_table *sgt)
+static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) {

bo->ops->unpin(dev, sgt);

map->bo->ops->unpin(map);

}

static inline void *host1x_bo_mmap(struct host1x_bo *bo)

2.23.0

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

Daniel Vetter

5:34 p.m.

New subject: [PATCH 3/3] drm/tegra: Implement correct DMA-BUF semantics

On Tue, Dec 03, 2019 at 06:33:08PM +0100, Daniel Vetter wrote:

...

On Tue, Dec 03, 2019 at 05:32:03PM +0100, Thierry Reding wrote:

...
From: Thierry Reding treding@nvidia.com

DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object.

Signed-off-by: Thierry Reding treding@nvidia.com

I didn't check the full details, but this looks like it's doing things as intended.

Acked-by: Daniel Vetter daniel.vetter@ffwll.ch

I forgot to add: I guess this could conflict with the cleanup I pushed into drm-misc-next for 5.6. If that's the case either put these there too, or wait until drm-misc-next has landed in drm-next before merging them to some tegra tree. -Daniel

...

Cheers, Daniel

...
drivers/gpu/drm/tegra/gem.c | 131 +++++++++++++++++++++------------- drivers/gpu/drm/tegra/plane.c | 47 ++++-------- drivers/gpu/drm/tegra/plane.h | 2 +- drivers/gpu/host1x/job.c | 127 ++++++++++++-------------------- drivers/gpu/host1x/job.h | 6 +- include/linux/host1x.h | 32 ++++++--- 6 files changed, 162 insertions(+), 183 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 1237df157e05..c085499067ed 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,71 +27,77 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put_unlocked(&obj->gem); }

-/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg,
		  unsigned int nents, gfp_t gfp_mask)
+static struct host1x_bo_mapping * +tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
    enum dma_data_direction direction)
{

struct scatterlist *dst;

unsigned int i;

struct tegra_bo *obj = host1x_to_tegra_bo(bo);

struct drm_gem_object *gem = &obj->gem;

struct host1x_bo_mapping *map; int err;
err = sg_alloc_table(sgt, nents, gfp_mask);

if (err < 0)
return err;
dst = sgt->sgl;
map = kzalloc(sizeof(*map), GFP_KERNEL);

if (!map)
return ERR_PTR(-ENOMEM);
for (i = 0; i < nents; i++) {
sg_set_page(dst, sg_page(sg), sg->length, 0);
dst = sg_next(dst);
sg = sg_next(sg);
}

return 0;
-}

-static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
		     dma_addr_t *phys)
-{

struct tegra_bo *obj = host1x_to_tegra_bo(bo);

struct sg_table *sgt;

int err;

map->direction = direction;

map->dev = dev;

map->bo = bo;

/*

If we've manually mapped the buffer object through the IOMMU, make

sure to return the IOVA address of our mapping.

*/
if (phys && obj->mm) {
*phys = obj->iova;
return NULL;
if (obj->mm) {
map->phys = obj->iova;
return map;
}

/*
* Imported buffers need special treatment to satisfy the semantics of
* DMA-BUF.
*/
if (gem->import_attach) {
struct dma_buf *buf = gem->import_attach->dmabuf;
map->attach = dma_buf_attach(buf, dev);
if (IS_ERR(map->attach)) {
	err = PTR_ERR(map->attach);
	goto free;
}
map->sgt = dma_buf_map_attachment(map->attach, direction);
if (IS_ERR(map->sgt)) {
	dma_buf_detach(buf, map->attach);
	err = PTR_ERR(map->sgt);
	goto free;
}
map->chunks = sgt_dma_count_chunks(map->sgt);
map->phys = sg_dma_address(map->sgt->sgl);
map->size = gem->size;
return map;
}

/*

If we don't have a mapping for this buffer yet, return an SG table

so that host1x can do the mapping for us via the DMA API.

*/
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);

if (!sgt)
return ERR_PTR(-ENOMEM);
map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);

if (!map->sgt) {
err = -ENOMEM;
goto free;
}

if (obj->pages) { /*

If the buffer object was allocated from the explicit IOMMU

API code paths, construct an SG table from the pages.

*/
err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages,
				0, obj->gem.size, GFP_KERNEL);
if (err < 0)
	goto free;
} else if (obj->sgt) {
/*
 * If the buffer object already has an SG table but no pages
 * were allocated for it, it means the buffer was imported and
 * the SG table needs to be copied to avoid overwriting any
 * other potential users of the original SG table.
 */
err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, obj->sgt->nents,
			     GFP_KERNEL);
err = sg_alloc_table_from_pages(map->sgt, obj->pages,
				obj->num_pages, 0,
				obj->gem.size, GFP_KERNEL);
if (err < 0) goto free; } else {
@@ -100,25 +106,48 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */
err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova,
err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova,
	      obj->gem.size);
if (err < 0) goto free; }
return sgt;
err = dma_map_sg(dev, map->sgt->sgl, map->sgt->nents, direction);

if (!err) {
err = -ENOMEM;
goto free_sgt;
}

map->phys = sg_dma_address(map->sgt->sgl);

map->size = gem->size;

map->chunks = err;

return map;
+free_sgt:

sg_free_table(map->sgt);

free:

kfree(sgt);

kfree(map->sgt);

kfree(map); return ERR_PTR(err);

}

-static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) {
if (sgt) {
sg_free_table(sgt);
kfree(sgt);
if (!map)
return;
if (map->attach) {
dma_buf_unmap_attachment(map->attach, map->sgt, map->direction);
dma_buf_detach(map->attach->dmabuf, map->attach);
} else {
dma_unmap_sg(map->dev, map->sgt->sgl, map->sgt->nents,
	     map->direction);
sg_free_table(map->sgt);
kfree(map->sgt);
}
kfree(map);
}

static void *tegra_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..28cf668026da 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -67,7 +67,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)

for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR;
copy->sgt[i] = NULL;
copy->map[i] = NULL;
}

return &copy->base;
@@ -114,18 +114,11 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
if (!dc->client.group) {
	struct sg_table *sgt;
	struct host1x_bo_mapping *map;
	sgt = host1x_bo_pin(dc->dev, &bo->base, NULL);
	if (IS_ERR(sgt)) {
		err = PTR_ERR(sgt);
		goto unpin;
	}
	err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents,
			 DMA_TO_DEVICE);
	if (err == 0) {
		err = -ENOMEM;
	map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE);
	if (IS_ERR(map)) {
		err = PTR_ERR(map);
	goto unpin;
}
@@ -135,13 +128,13 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) * map its SG table to a single contiguous chunk of * I/O virtual memory. */
	if (err > 1) {
	if (map->chunks > 1) {
	err = -EINVAL;
	goto unpin;
}
	state->iova[i] = sg_dma_address(sgt->sgl);
	state->sgt[i] = sgt;
	state->iova[i] = map->phys;
	state->map[i] = map;
} else { state->iova[i] = bo->iova; }
@@ -153,14 +146,9 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);

while (i--) {
struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
struct sg_table *sgt = state->sgt[i];
dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
host1x_bo_unpin(dc->dev, &bo->base, sgt);
host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
state->sgt[i] = NULL;
state->map[i] = NULL;
}

return err;
@@ -171,20 +159,11 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i;

for (i = 0; i < state->base.fb->format->num_planes; i++) {
struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
if (!dc->client.group) {
	struct sg_table *sgt = state->sgt[i];
	if (sgt) {
		dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
			     DMA_TO_DEVICE);
		host1x_bo_unpin(dc->dev, &bo->base, sgt);
	}
}
if (!dc->client.group)
	host1x_bo_unpin(state->map[i]);
state->iova[i] = DMA_MAPPING_ERROR;
state->sgt[i] = NULL;
state->map[i] = NULL;
}
}

diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index a158a915109a..08df1ba1f567 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -39,7 +39,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base;

struct sg_table *sgt[3];

struct host1x_bo_mapping *map[3]; dma_addr_t iova[3];

struct tegra_bo_tiling tiling;

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 60b2fedd0061..88e445553df3 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -99,6 +99,7 @@ EXPORT_SYMBOL(host1x_job_add_gather);

static unsigned int pin_job(struct host1x *host, struct host1x_job *job) {

unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; unsigned int i;

@@ -108,8 +109,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)

for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i];
dma_addr_t phys_addr, *phys;
struct sg_table *sgt;
enum dma_data_direction direction;
struct host1x_bo_mapping *map;
reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) {
@@ -117,63 +118,40 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
if (client->group)
	phys = &phys_addr;
else
	phys = NULL;
sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
if (IS_ERR(sgt)) {
	err = PTR_ERR(sgt);
	goto unpin;
}
if (sgt) {
	unsigned long mask = HOST1X_RELOC_READ |
			     HOST1X_RELOC_WRITE;
	enum dma_data_direction dir;
	switch (reloc->flags & mask) {
	case HOST1X_RELOC_READ:
		dir = DMA_TO_DEVICE;
		break;
	case HOST1X_RELOC_WRITE:
		dir = DMA_FROM_DEVICE;
		break;
switch (reloc->flags & mask) {
case HOST1X_RELOC_READ:
	direction = DMA_TO_DEVICE;
	break;
	case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
		dir = DMA_BIDIRECTIONAL;
		break;
case HOST1X_RELOC_WRITE:
	direction = DMA_FROM_DEVICE;
	break;
	default:
		err = -EINVAL;
		goto unpin;
	}
case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
	direction = DMA_BIDIRECTIONAL;
	break;
	err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
	if (!err) {
		err = -ENOMEM;
		goto unpin;
	}
default:
	err = -EINVAL;
	goto unpin;
}
	job->unpins[job->num_unpins].dev = dev;
	job->unpins[job->num_unpins].dir = dir;
	phys_addr = sg_dma_address(sgt->sgl);
map = host1x_bo_pin(dev, reloc->target.bo, direction);
if (IS_ERR(map)) {
	err = PTR_ERR(map);
	goto unpin;
}
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
job->unpins[job->num_unpins].sgt = sgt;
job->addr_phys[job->num_unpins] = map->phys;
job->unpins[job->num_unpins].map = map;
job->num_unpins++; }

for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i];
struct host1x_bo_mapping *map;
size_t gather_size = 0; struct scatterlist *sg;
struct sg_table *sgt;
dma_addr_t phys_addr;
unsigned long shift; struct iova *alloc; unsigned int j;
@@ -184,15 +162,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
sgt = host1x_bo_pin(host->dev, g->bo, NULL);
if (IS_ERR(sgt)) {
	err = PTR_ERR(sgt);
map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE);
if (IS_ERR(map)) {
	err = PTR_ERR(map);
goto unpin;
}

if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
	for_each_sg(sgt->sgl, sg, sgt->nents, j)
	for_each_sg(map->sgt->sgl, sg, map->sgt->nents, j)
	gather_size += sg->length;
gather_size = iova_align(&host->iova, gather_size);

shift = iova_shift(&host->iova);
@@ -204,35 +183,24 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) }
	err = iommu_map_sg(host->domain,
			iova_dma_addr(&host->iova, alloc),
			sgt->sgl, sgt->nents, IOMMU_READ);
			   iova_dma_addr(&host->iova, alloc),
			   map->sgt->sgl, map->sgt->nents,
			   IOMMU_READ);
if (err == 0) {
	__free_iova(&host->iova, alloc);
	err = -EINVAL;
	goto unpin;
}
	job->unpins[job->num_unpins].size = gather_size;
	phys_addr = iova_dma_addr(&host->iova, alloc);
} else {
	err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
			 DMA_TO_DEVICE);
	if (!err) {
		err = -ENOMEM;
		goto unpin;
	}
	job->unpins[job->num_unpins].dev = host->dev;
	phys_addr = sg_dma_address(sgt->sgl);
	map->phys = iova_dma_addr(&host->iova, alloc);
	map->size = gather_size;
}
job->addr_phys[job->num_unpins] = phys_addr;
job->gather_addr_phys[i] = phys_addr;
job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
job->addr_phys[job->num_unpins] = map->phys;
job->unpins[job->num_unpins].map = map;
job->num_unpins++;
job->gather_addr_phys[i] = map->phys;
}

return 0;
@@ -608,24 +576,19 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i;

for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
struct device *dev = unpin->dev ?: host->dev;
struct sg_table *sgt = unpin->sgt;
struct host1x_bo_mapping *map = job->unpins[i].map;
struct host1x_bo *bo = map->bo;
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
    unpin->size && host->domain) {
    map->size && host->domain) {
iommu_unmap(host->domain, job->addr_phys[i],
		    unpin->size);
		    map->size);
free_iova(&host->iova,
		iova_pfn(&host->iova, job->addr_phys[i]));
		  iova_pfn(&host->iova, job->addr_phys[i]));
}
if (unpin->dev && sgt)
	dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
		     unpin->dir);
host1x_bo_unpin(dev, unpin->bo, sgt);
host1x_bo_put(unpin->bo);
host1x_bo_unpin(map);
host1x_bo_put(bo);
}

job->num_unpins = 0;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..c28eebb8ce14 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -19,11 +19,7 @@ struct host1x_job_gather { };

struct host1x_job_unpin_data {

struct host1x_bo *bo;

struct sg_table *sgt;

struct device *dev;

size_t size;

enum dma_data_direction dir;

struct host1x_bo_mapping *map;

};

/* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 0254ebcdc0a7..f1e6fad19289 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H

#include <linux/device.h> +#include <linux/dma-direction.h> #include <linux/types.h>

enum host1x_class { @@ -72,12 +73,24 @@ struct host1x_client { struct host1x_bo; struct sg_table;

+struct host1x_bo_mapping {

struct dma_buf_attachment *attach;

enum dma_data_direction direction;

struct host1x_bo *bo;

struct sg_table *sgt;

unsigned int chunks;

struct device *dev;

dma_addr_t phys;

size_t size;

+};

struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo);
struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
		dma_addr_t *phys);
void (*unpin)(struct device *dev, struct sg_table *sgt);
struct host1x_bo_mapping *(*pin)(struct device *dev,
			 struct host1x_bo *bo,
			 enum dma_data_direction dir);
void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr);
}; @@ -102,17 +115,16 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); }

-static inline struct sg_table *host1x_bo_pin(struct device *dev,
			     struct host1x_bo *bo,
			     dma_addr_t *phys)
+static inline struct host1x_bo_mapping * +host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
     enum dma_data_direction dir)
{

return bo->ops->pin(dev, bo, phys);

return bo->ops->pin(dev, bo, dir);

}

-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
		   struct sg_table *sgt)
+static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) {

bo->ops->unpin(dev, sgt);

map->bo->ops->unpin(map);

}

static inline void *host1x_bo_mmap(struct host1x_bo *bo)

2.23.0
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch

1974

Age (days ago)

1974

Last active (days ago)

dri-devel@lists.freedesktop.org

5 comments

2 participants

tags (0)

participants (2)

Daniel Vetter
Thierry Reding

[PATCH 1/3] lib/scatterlist: Add contiguous DMA chunks helpers

static inline void host1x_bo_mmap(struct host1x_bo bo)

static inline void host1x_bo_mmap(struct host1x_bo bo)