From: Thierry Reding treding@nvidia.com
Add a few helpers to count the number of contiguous DMA chunks found in an SG table. This is useful to determine whether or not a mapping can be used by drivers whose devices need contiguous memory.
Signed-off-by: Thierry Reding treding@nvidia.com --- include/linux/scatterlist.h | 11 +++++++++++ lib/scatterlist.c | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 266754a55327..cca235ff2d0a 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -281,6 +281,7 @@ int sg_split(struct scatterlist *in, const int in_mapped_nents, const size_t *split_sizes, struct scatterlist **out, int *out_mapped_nents, gfp_t gfp_mask); +unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents);
typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); @@ -358,6 +359,16 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, size_t buflen, off_t skip);
+static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt) +{ + return sg_dma_count_chunks(sgt->sgl, sgt->nents); +} + +static inline bool sgt_dma_contiguous(struct sg_table *sgt) +{ + return sgt_dma_count_chunks(sgt) == 1; +} + /* * Maximum number of entries that will be allocated in one piece, if * a list larger than this is required then chaining will be utilized. diff --git a/lib/scatterlist.c b/lib/scatterlist.c index abb3432ed744..fae2179a218a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -142,6 +142,32 @@ void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) } EXPORT_SYMBOL(sg_init_one);
+/** + * sg_dma_count_chunks - return number of contiguous DMA chunks in scatterlist + * @sgl: SG table + * @nents: number of entries in SG table + */ +unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents) +{ + dma_addr_t next = ~(dma_addr_t)0; + unsigned int count = 0, i; + struct scatterlist *s; + + for_each_sg(sgl, s, nents, i) { + /* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */ + if (!sg_dma_len(s)) + continue; + + if (sg_dma_address(s) != next) { + next = sg_dma_address(s) + sg_dma_len(s); + count++; + } + } + + return count; +} +EXPORT_SYMBOL(sg_dma_count_chunks); + /* * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree * helpers.
From: Thierry Reding treding@nvidia.com
DMA-BUF requires that each device that accesses a DMA-BUF attaches to it separately. To do so the host1x_bo_pin() and host1x_bo_unpin() functions need to be reimplemented so that they can return a mapping, which either represents an attachment or a map of the driver's own GEM object.
Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/gem.c | 138 +++++++++++++++------------- drivers/gpu/drm/tegra/plane.c | 60 ++++--------- drivers/gpu/drm/tegra/plane.h | 2 +- drivers/gpu/drm/tegra/submit.c | 63 +++++++++---- drivers/gpu/drm/tegra/uapi.c | 68 ++++++-------- drivers/gpu/drm/tegra/uapi.h | 5 +- drivers/gpu/host1x/job.c | 158 ++++++++++++--------------------- drivers/gpu/host1x/job.h | 6 +- include/linux/host1x.h | 30 ++++--- 9 files changed, 244 insertions(+), 286 deletions(-)
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 6ec598f5d5b3..79a609f981d1 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -27,79 +27,64 @@ static void tegra_bo_put(struct host1x_bo *bo) drm_gem_object_put(&obj->gem); }
-/* XXX move this into lib/scatterlist.c? */ -static int sg_alloc_table_from_sg(struct sg_table *sgt, struct scatterlist *sg, - unsigned int nents, gfp_t gfp_mask) +static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction direction) { - struct scatterlist *dst; - unsigned int i; + struct tegra_bo *obj = host1x_to_tegra_bo(bo); + struct drm_gem_object *gem = &obj->gem; + struct host1x_bo_mapping *map; int err;
- err = sg_alloc_table(sgt, nents, gfp_mask); - if (err < 0) - return err; + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM);
- dst = sgt->sgl; + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev;
- for (i = 0; i < nents; i++) { - sg_set_page(dst, sg_page(sg), sg->length, 0); - dst = sg_next(dst); - sg = sg_next(sg); - } + /* + * Imported buffers need special treatment to satisfy the semantics of DMA-BUF. + */ + if (gem->import_attach) { + struct dma_buf *buf = gem->import_attach->dmabuf;
- return 0; -} + map->attach = dma_buf_attach(buf, dev); + if (IS_ERR(map->attach)) { + err = PTR_ERR(map->attach); + goto free; + }
-static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys) -{ - struct tegra_bo *obj = host1x_to_tegra_bo(bo); - struct sg_table *sgt; - int err; + map->sgt = dma_buf_map_attachment(map->attach, direction); + if (IS_ERR(map->sgt)) { + dma_buf_detach(buf, map->attach); + err = PTR_ERR(map->sgt); + goto free; + }
- /* - * If we've manually mapped the buffer object through the IOMMU, make - * sure to return the IOVA address of our mapping. - * - * Similarly, for buffers that have been allocated by the DMA API the - * physical address can be used for devices that are not attached to - * an IOMMU. For these devices, callers must pass a valid pointer via - * the @phys argument. - * - * Imported buffers were also already mapped at import time, so the - * existing mapping can be reused. - */ - if (phys) { - *phys = obj->iova; - return NULL; + err = sgt_dma_count_chunks(map->sgt); + map->size = gem->size; + + goto out; }
/* * If we don't have a mapping for this buffer yet, return an SG table * so that host1x can do the mapping for us via the DMA API. */ - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + }
if (obj->pages) { /* * If the buffer object was allocated from the explicit IOMMU * API code paths, construct an SG table from the pages. */ - err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages, - 0, obj->gem.size, GFP_KERNEL); - if (err < 0) - goto free; - } else if (obj->sgt) { - /* - * If the buffer object already has an SG table but no pages - * were allocated for it, it means the buffer was imported and - * the SG table needs to be copied to avoid overwriting any - * other potential users of the original SG table. - */ - err = sg_alloc_table_from_sg(sgt, obj->sgt->sgl, - obj->sgt->orig_nents, GFP_KERNEL); + err = sg_alloc_table_from_pages(map->sgt, obj->pages, obj->num_pages, 0, gem->size, + GFP_KERNEL); if (err < 0) goto free; } else { @@ -108,25 +93,56 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, * not imported, it had to be allocated with the DMA API, so * the DMA API helper can be used. */ - err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova, - obj->gem.size); + err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, gem->size); if (err < 0) goto free; }
- return sgt; + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt;
+out: + /* + * If we've manually mapped the buffer object through the IOMMU, make sure to return the + * existing IOVA address of our mapping. + */ + if (!obj->mm) { + map->phys = sg_dma_address(map->sgt->sgl); + map->chunks = err; + } else { + map->phys = obj->iova; + map->chunks = 1; + } + + map->size = gem->size; + + return map; + +free_sgt: + sg_free_table(map->sgt); free: - kfree(sgt); + kfree(map->sgt); + kfree(map); return ERR_PTR(err); }
-static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt) +static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); + if (!map) + return; + + if (map->attach) { + dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); + dma_buf_detach(map->attach->dmabuf, map->attach); + } else { + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); } + + host1x_bo_put(map->bo); + kfree(map); }
static void *tegra_bo_mmap(struct host1x_bo *bo) diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 16a1cdc28657..e579d966e8dc 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -74,7 +74,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
for (i = 0; i < 3; i++) { copy->iova[i] = DMA_MAPPING_ERROR; - copy->sgt[i] = NULL; + copy->map[i] = NULL; }
return ©->base; @@ -138,55 +138,37 @@ const struct drm_plane_funcs tegra_plane_funcs = {
static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err;
for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - dma_addr_t phys_addr, *phys; - struct sg_table *sgt; + struct host1x_bo_mapping *map;
- /* - * If we're not attached to a domain, we already stored the - * physical address when the buffer was allocated. If we're - * part of a group that's shared between all display - * controllers, we've also already mapped the framebuffer - * through the SMMU. In both cases we can short-circuit the - * code below and retrieve the stored IOV address. - */ - if (!domain || dc->client.group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dc->dev, &bo->base, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; }
- if (sgt) { - err = dma_map_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto unpin; - + if (!dc->client.group) { /* * The display controller needs contiguous memory, so * fail if the buffer is discontiguous and we fail to * map its SG table to a single contiguous chunk of * I/O virtual memory. */ - if (sgt->nents > 1) { + if (map->chunks > 1) { err = -EINVAL; goto unpin; }
- state->iova[i] = sg_dma_address(sgt->sgl); - state->sgt[i] = sgt; + state->iova[i] = map->phys; } else { - state->iova[i] = phys_addr; + state->iova[i] = bo->iova; } + + state->map[i] = map; }
return 0; @@ -195,15 +177,9 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);
while (i--) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; }
return err; @@ -214,15 +190,9 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) unsigned int i;
for (i = 0; i < state->base.fb->format->num_planes; i++) { - struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); - struct sg_table *sgt = state->sgt[i]; - - if (sgt) - dma_unmap_sgtable(dc->dev, sgt, DMA_TO_DEVICE, 0); - - host1x_bo_unpin(dc->dev, &bo->base, sgt); + host1x_bo_unpin(state->map[i]); state->iova[i] = DMA_MAPPING_ERROR; - state->sgt[i] = NULL; + state->map[i] = NULL; } }
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index d9470780c803..dfb20712fbd7 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -43,7 +43,7 @@ struct tegra_plane_legacy_blending_state { struct tegra_plane_state { struct drm_plane_state base;
- struct sg_table *sgt[3]; + struct host1x_bo_mapping *map[3]; dma_addr_t iova[3];
struct tegra_bo_tiling tiling; diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c index 776f825df52f..c32698404e36 100644 --- a/drivers/gpu/drm/tegra/submit.c +++ b/drivers/gpu/drm/tegra/submit.c @@ -64,33 +64,62 @@ static void gather_bo_put(struct host1x_bo *host_bo) kref_put(&bo->ref, gather_bo_release); }
-static struct sg_table * -gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys) +static struct host1x_bo_mapping * +gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) { - struct gather_bo *bo = container_of(host_bo, struct gather_bo, base); - struct sg_table *sgt; + struct gather_bo *gather = container_of(bo, struct gather_bo, base); + struct host1x_bo_mapping *map; int err;
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) return ERR_PTR(-ENOMEM);
- err = dma_get_sgtable(bo->dev, sgt, bo->gather_data, bo->gather_data_dma, - bo->gather_data_words * 4); - if (err) { - kfree(sgt); - return ERR_PTR(err); + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; }
- return sgt; + err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma, + gather->gather_data_words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = gather->gather_data_words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); }
-static void gather_bo_unpin(struct device *dev, struct sg_table *sgt) +static void gather_bo_unpin(struct host1x_bo_mapping *map) { - if (sgt) { - sg_free_table(sgt); - kfree(sgt); - } + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); }
static void *gather_bo_mmap(struct host1x_bo *host_bo) diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index dc16a24f4dbe..794c400c38b1 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -17,11 +17,7 @@ static void tegra_drm_mapping_release(struct kref *ref) struct tegra_drm_mapping *mapping = container_of(ref, struct tegra_drm_mapping, ref);
- if (mapping->sgt) - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); host1x_bo_put(mapping->bo);
kfree(mapping); @@ -159,6 +155,7 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f struct drm_tegra_channel_map *args = data; struct tegra_drm_mapping *mapping; struct tegra_drm_context *context; + enum dma_data_direction direction; int err = 0;
if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE) @@ -180,68 +177,53 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f
kref_init(&mapping->ref);
- mapping->dev = context->client->base.dev; mapping->bo = tegra_gem_lookup(file, args->handle); if (!mapping->bo) { err = -EINVAL; - goto unlock; + goto free; }
- if (context->client->base.group) { - /* IOMMU domain managed directly using IOMMU API */ - host1x_bo_pin(mapping->dev, mapping->bo, &mapping->iova); - } else { - switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { - case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: - mapping->direction = DMA_BIDIRECTIONAL; - break; - - case DRM_TEGRA_CHANNEL_MAP_WRITE: - mapping->direction = DMA_FROM_DEVICE; - break; - - case DRM_TEGRA_CHANNEL_MAP_READ: - mapping->direction = DMA_TO_DEVICE; - break; + switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) { + case DRM_TEGRA_CHANNEL_MAP_READ_WRITE: + direction = DMA_BIDIRECTIONAL; + break;
- default: - return -EINVAL; - } + case DRM_TEGRA_CHANNEL_MAP_WRITE: + direction = DMA_FROM_DEVICE; + break;
- mapping->sgt = host1x_bo_pin(mapping->dev, mapping->bo, NULL); - if (IS_ERR(mapping->sgt)) { - err = PTR_ERR(mapping->sgt); - goto put_gem; - } + case DRM_TEGRA_CHANNEL_MAP_READ: + direction = DMA_TO_DEVICE; + break;
- err = dma_map_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - if (err) - goto unpin; + default: + err = -EINVAL; + goto put_gem; + }
- mapping->iova = sg_dma_address(mapping->sgt->sgl); + mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction); + if (IS_ERR(mapping->map)) { + err = PTR_ERR(mapping->map); + goto put_gem; }
+ mapping->iova = mapping->map->phys; mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->size;
err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX), GFP_KERNEL); if (err < 0) - goto unmap; + goto unpin;
mutex_unlock(&fpriv->lock);
return 0;
-unmap: - if (mapping->sgt) { - dma_unmap_sgtable(mapping->dev, mapping->sgt, mapping->direction, - DMA_ATTR_SKIP_CPU_SYNC); - } unpin: - host1x_bo_unpin(mapping->dev, mapping->bo, mapping->sgt); + host1x_bo_unpin(mapping->map); put_gem: host1x_bo_put(mapping->bo); +free: kfree(mapping); unlock: mutex_unlock(&fpriv->lock); diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h index 12adad770ad3..92ff1e44ff15 100644 --- a/drivers/gpu/drm/tegra/uapi.h +++ b/drivers/gpu/drm/tegra/uapi.h @@ -27,10 +27,9 @@ struct tegra_drm_file { struct tegra_drm_mapping { struct kref ref;
- struct device *dev; + struct host1x_bo_mapping *map; struct host1x_bo *bo; - struct sg_table *sgt; - enum dma_data_direction direction; + dma_addr_t iova; dma_addr_t iova_end; }; diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 0eef6df7c89e..7a27895e0d28 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -134,11 +134,11 @@ EXPORT_SYMBOL(host1x_job_add_wait);
static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; struct iommu_domain *domain; - struct sg_table *sgt; unsigned int i; int err;
@@ -147,7 +147,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo;
reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -155,64 +157,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
- /* - * If the client device is not attached to an IOMMU, the - * physical address of the buffer object can be used. - * - * Similarly, when an IOMMU domain is shared between all - * host1x clients, the IOVA is already available, so no - * need to map the buffer object again. - * - * XXX Note that this isn't always safe to do because it - * relies on an assumption that no cache maintenance is - * needed on the buffer objects. - */ - if (!domain || client->group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + bo = reloc->target.bo;
- if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break;
- case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break;
- case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break;
- default: - err = -EINVAL; - goto unpin; - } + default: + err = -EINVAL; + goto unpin; + }
- err = dma_map_sgtable(dev, sgt, dir, 0); - if (err) - goto unpin; + map = host1x_bo_pin(dev, bo, direction); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + }
- job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; }
- job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; }
@@ -224,12 +206,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) return 0;
for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; - dma_addr_t *phys; unsigned int j;
if (job->cmds[i].is_wait) @@ -243,25 +224,16 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
- /** - * If the host1x is not attached to an IOMMU, there is no need - * to map the buffer object for the host1x, since the physical - * address can simply be used. - */ - if (!iommu_get_domain_for_dev(host->dev)) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(host->dev, g->bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto put; + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; }
if (host->domain) { - for_each_sgtable_sg(sgt, sg, j) + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova); @@ -272,33 +244,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto put; }
- err = iommu_map_sgtable(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; goto put; }
- job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); - } else if (sgt) { - err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto put; - - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; }
- job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; }
return 0; @@ -690,22 +652,16 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i;
for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; - - if (!job->enable_firewall && unpin->size && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); - } + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo;
- if (unpin->dev && sgt) - dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0); + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); + }
- host1x_bo_unpin(dev, unpin->bo, sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); }
job->num_unpins = 0; diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index b4428c5495c9..dad5a1946693 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -35,11 +35,7 @@ struct host1x_job_cmd { };
struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - struct device *dev; - size_t size; - enum dma_data_direction dir; + struct host1x_bo_mapping *map; };
/* diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 2a1b53ebee77..56851523378a 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -7,6 +7,7 @@ #define __LINUX_HOST1X_H
#include <linux/device.h> +#include <linux/dma-direction.h> #include <linux/types.h>
enum host1x_class { @@ -84,12 +85,23 @@ struct host1x_client { struct host1x_bo; struct sg_table;
+struct host1x_bo_mapping { + struct dma_buf_attachment *attach; + enum dma_data_direction direction; + struct host1x_bo *bo; + struct sg_table *sgt; + unsigned int chunks; + struct device *dev; + dma_addr_t phys; + size_t size; +}; + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); - struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo, - dma_addr_t *phys); - void (*unpin)(struct device *dev, struct sg_table *sgt); + struct host1x_bo_mapping *(*pin)(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir); + void (*unpin)(struct host1x_bo_mapping *map); void *(*mmap)(struct host1x_bo *bo); void (*munmap)(struct host1x_bo *bo, void *addr); }; @@ -114,17 +126,15 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); }
-static inline struct sg_table *host1x_bo_pin(struct device *dev, - struct host1x_bo *bo, - dma_addr_t *phys) +static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir) { - return bo->ops->pin(dev, bo, phys); + return bo->ops->pin(dev, bo, dir); }
-static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo, - struct sg_table *sgt) +static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) { - bo->ops->unpin(dev, sgt); + map->bo->ops->unpin(map); }
static inline void *host1x_bo_mmap(struct host1x_bo *bo)
From: Thierry Reding treding@nvidia.com
This cache is used to avoid mapping and unmapping buffer objects unnecessarily. Mappings are cached per client and stay hot until the buffer object is destroyed.
Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/gem.c | 14 +++++-- drivers/gpu/drm/tegra/plane.c | 2 +- drivers/gpu/drm/tegra/uapi.c | 3 +- drivers/gpu/host1x/bus.c | 78 +++++++++++++++++++++++++++++++++++ drivers/gpu/host1x/dev.c | 2 + drivers/gpu/host1x/dev.h | 2 + drivers/gpu/host1x/job.c | 4 +- include/linux/host1x.h | 53 +++++++++++++++++++----- 8 files changed, 141 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 79a609f981d1..e3f0578fc411 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -39,6 +39,7 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_ if (!map) return ERR_PTR(-ENOMEM);
+ kref_init(&map->ref); map->bo = host1x_bo_get(bo); map->direction = direction; map->dev = dev; @@ -129,9 +130,6 @@ static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_
static void tegra_bo_unpin(struct host1x_bo_mapping *map) { - if (!map) - return; - if (map->attach) { dma_buf_unmap_attachment(map->attach, map->sgt, map->direction); dma_buf_detach(map->attach->dmabuf, map->attach); @@ -465,8 +463,18 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm, void tegra_bo_free_object(struct drm_gem_object *gem) { struct tegra_drm *tegra = gem->dev->dev_private; + struct host1x_bo_mapping *mapping, *tmp; struct tegra_bo *bo = to_tegra_bo(gem);
+ /* remove all mappings of this buffer object from any caches */ + list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) { + if (mapping->cache) + host1x_bo_unpin(mapping); + else + dev_err(gem->dev->dev, "mapping %p stale for device %s\n", mapping, + dev_name(mapping->dev)); + } + if (tegra->domain) tegra_bo_iommu_unmap(tegra, bo);
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index e579d966e8dc..f8e8afcbcbf2 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -145,7 +145,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); struct host1x_bo_mapping *map;
- map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE); + map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE, &dc->client.cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 794c400c38b1..66fe8717e747 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -201,7 +201,8 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f goto put_gem; }
- mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction); + mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction, + &context->client->base.cache); if (IS_ERR(mapping->map)) { err = PTR_ERR(mapping->map); goto put_gem; diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 218e3718fd68..0b67d894470d 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -742,6 +742,7 @@ EXPORT_SYMBOL(host1x_driver_unregister); */ void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) { + host1x_bo_cache_init(&client->cache); INIT_LIST_HEAD(&client->list); __mutex_init(&client->lock, "host1x client lock", key); client->usecount = 0; @@ -830,6 +831,8 @@ int host1x_client_unregister(struct host1x_client *client)
mutex_unlock(&clients_lock);
+ host1x_bo_cache_destroy(&client->cache); + return 0; } EXPORT_SYMBOL(host1x_client_unregister); @@ -904,3 +907,78 @@ int host1x_client_resume(struct host1x_client *client) return err; } EXPORT_SYMBOL(host1x_client_resume); + +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache) +{ + struct host1x_bo_mapping *mapping; + + if (cache) { + mutex_lock(&cache->lock); + + list_for_each_entry(mapping, &cache->mappings, entry) { + if (mapping->bo == bo && mapping->direction == dir) { + kref_get(&mapping->ref); + goto unlock; + } + } + } + + mapping = bo->ops->pin(dev, bo, dir); + if (IS_ERR(mapping)) + goto unlock; + + spin_lock(&mapping->bo->lock); + list_add_tail(&mapping->list, &bo->mappings); + spin_unlock(&mapping->bo->lock); + + if (cache) { + INIT_LIST_HEAD(&mapping->entry); + mapping->cache = cache; + + list_add_tail(&mapping->entry, &cache->mappings); + + /* bump reference count to track the copy in the cache */ + kref_get(&mapping->ref); + } + +unlock: + if (cache) + mutex_unlock(&cache->lock); + + return mapping; +} +EXPORT_SYMBOL(host1x_bo_pin); + +static void __host1x_bo_unpin(struct kref *ref) +{ + struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref); + + /* + * When the last reference of the mapping goes away, make sure to remove the mapping from + * the cache. + */ + if (mapping->cache) + list_del(&mapping->entry); + + spin_lock(&mapping->bo->lock); + list_del(&mapping->list); + spin_unlock(&mapping->bo->lock); + + mapping->bo->ops->unpin(mapping); +} + +void host1x_bo_unpin(struct host1x_bo_mapping *mapping) +{ + struct host1x_bo_cache *cache = mapping->cache; + + if (cache) + mutex_lock(&cache->lock); + + kref_put(&mapping->ref, __host1x_bo_unpin); + + if (cache) + mutex_unlock(&cache->lock); +} +EXPORT_SYMBOL(host1x_bo_unpin); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index e2ddf3fcaa9a..3d4cabdbc78d 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -404,6 +404,7 @@ static int host1x_probe(struct platform_device *pdev) if (syncpt_irq < 0) return syncpt_irq;
+ host1x_bo_cache_init(&host->cache); mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); INIT_LIST_HEAD(&host->list); @@ -530,6 +531,7 @@ static int host1x_remove(struct platform_device *pdev) reset_control_assert(host->rst); clk_disable_unprepare(host->clk); host1x_iommu_exit(host); + host1x_bo_cache_destroy(&host->cache);
return 0; } diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index fa6d4bc46e98..5b7fdea5d169 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -149,6 +149,8 @@ struct host1x { struct list_head list;
struct device_dma_parameters dma_parms; + + struct host1x_bo_cache cache; };
void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 7a27895e0d28..5b7dcbdefcab 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -177,7 +177,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
- map = host1x_bo_pin(dev, bo, direction); + map = host1x_bo_pin(dev, bo, direction, &client->cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; @@ -224,7 +224,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; }
- map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE); + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache); if (IS_ERR(map)) { err = PTR_ERR(map); goto unpin; diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 56851523378a..2ca53d7ed7ca 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@
#include <linux/device.h> #include <linux/dma-direction.h> +#include <linux/spinlock.h> #include <linux/types.h>
enum host1x_class { @@ -26,6 +27,28 @@ struct iommu_group;
u64 host1x_get_dma_mask(struct host1x *host1x);
+/** + * struct host1x_bo_cache - host1x buffer object cache + * @mappings: list of mappings + * @lock: synchronizes accesses to the list of mappings + */ +struct host1x_bo_cache { + struct list_head mappings; + struct mutex lock; +}; + +static inline void host1x_bo_cache_init(struct host1x_bo_cache *cache) +{ + INIT_LIST_HEAD(&cache->mappings); + mutex_init(&cache->lock); +} + +static inline void host1x_bo_cache_destroy(struct host1x_bo_cache *cache) +{ + /* XXX warn if not empty? */ + mutex_destroy(&cache->lock); +} + /** * struct host1x_client_ops - host1x client operations * @early_init: host1x client early initialization code @@ -76,6 +99,8 @@ struct host1x_client { struct host1x_client *parent; unsigned int usecount; struct mutex lock; + + struct host1x_bo_cache cache; };
/* @@ -86,16 +111,26 @@ struct host1x_bo; struct sg_table;
struct host1x_bo_mapping { + struct kref ref; struct dma_buf_attachment *attach; enum dma_data_direction direction; + struct list_head list; struct host1x_bo *bo; struct sg_table *sgt; unsigned int chunks; struct device *dev; dma_addr_t phys; size_t size; + + struct host1x_bo_cache *cache; + struct list_head entry; };
+static inline struct host1x_bo_mapping *to_host1x_bo_mapping(struct kref *ref) +{ + return container_of(ref, struct host1x_bo_mapping, ref); +} + struct host1x_bo_ops { struct host1x_bo *(*get)(struct host1x_bo *bo); void (*put)(struct host1x_bo *bo); @@ -108,11 +143,15 @@ struct host1x_bo_ops {
struct host1x_bo { const struct host1x_bo_ops *ops; + struct list_head mappings; + spinlock_t lock; };
static inline void host1x_bo_init(struct host1x_bo *bo, const struct host1x_bo_ops *ops) { + INIT_LIST_HEAD(&bo->mappings); + spin_lock_init(&bo->lock); bo->ops = ops; }
@@ -126,16 +165,10 @@ static inline void host1x_bo_put(struct host1x_bo *bo) bo->ops->put(bo); }
-static inline struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, - enum dma_data_direction dir) -{ - return bo->ops->pin(dev, bo, dir); -} - -static inline void host1x_bo_unpin(struct host1x_bo_mapping *map) -{ - map->bo->ops->unpin(map); -} +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache); +void host1x_bo_unpin(struct host1x_bo_mapping *map);
static inline void *host1x_bo_mmap(struct host1x_bo *bo) {
On 9/16/21 12:44 PM, Thierry Reding wrote:
From: Thierry Reding treding@nvidia.com ... diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 794c400c38b1..66fe8717e747 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -201,7 +201,8 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f goto put_gem; }
- mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction);
- mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction,
&context->client->base.cache);
Do we need caching here? The map/unmap operation is explicit and should not be on the hot path, and this will complicate context isolation support where we cannot have an engine-specific cache.
Mikko
On Thu, Sep 16, 2021 at 01:39:49PM +0300, Mikko Perttunen wrote:
On 9/16/21 12:44 PM, Thierry Reding wrote:
From: Thierry Reding treding@nvidia.com ... diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c index 794c400c38b1..66fe8717e747 100644 --- a/drivers/gpu/drm/tegra/uapi.c +++ b/drivers/gpu/drm/tegra/uapi.c @@ -201,7 +201,8 @@ int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_f goto put_gem; }
- mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction);
- mapping->map = host1x_bo_pin(context->client->base.dev, mapping->bo, direction,
&context->client->base.cache);
Do we need caching here? The map/unmap operation is explicit and should not be on the hot path, and this will complicate context isolation support where we cannot have an engine-specific cache.
Yeah, we can probably leave it out here. I more or less blindly added caches for everything, but really they're the most useful for cursors and large framebuffers that will otherwise get mapped/unmapped once every frame.
Thierry
From: Thierry Reding treding@nvidia.com
Instead of referencing the tegra_plane_funcs struct directly, use each plane's vtable instead. This makes it more future-proof in case any of the planes ever use a different set of functions.
Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/dc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 16c7aabb94d3..2025b18d2f63 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -1267,9 +1267,9 @@ static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm, err = PTR_ERR(planes[i]);
while (i--) - tegra_plane_funcs.destroy(planes[i]); + planes[i]->funcs->destroy(planes[i]);
- tegra_plane_funcs.destroy(primary); + primary->funcs->destroy(primary); return ERR_PTR(err); } }
From: Thierry Reding treding@nvidia.com
Currently this function doesn't return an error, but that may change in the future, so make sure to propagate any error codes that it might return.
Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/plane.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index f8e8afcbcbf2..321cb1f13da6 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -200,11 +200,14 @@ int tegra_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { struct tegra_dc *dc = to_tegra_dc(state->crtc); + int err;
if (!state->fb) return 0;
- drm_gem_plane_helper_prepare_fb(plane, state); + err = drm_gem_plane_helper_prepare_fb(plane, state); + if (err < 0) + return err;
return tegra_dc_pin(dc, to_tegra_plane_state(state)); }
From: Thierry Reding treding@nvidia.com
This adds support for asynchronously updating the cursor plane, which enables support for the legacy cursor IOCTLs.
Signed-off-by: Thierry Reding treding@nvidia.com --- drivers/gpu/drm/tegra/dc.c | 80 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 2025b18d2f63..d9a8df51e3e2 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -890,11 +890,9 @@ static int tegra_cursor_atomic_check(struct drm_plane *plane, return 0; }
-static void tegra_cursor_atomic_update(struct drm_plane *plane, - struct drm_atomic_state *state) +static void __tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_plane_state *new_state) { - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state); struct tegra_dc *dc = to_tegra_dc(new_state->crtc); struct tegra_drm *tegra = plane->dev->dev_private; @@ -990,6 +988,14 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION); }
+static void tegra_cursor_atomic_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + + __tegra_cursor_atomic_update(plane, new_state); +} + static void tegra_cursor_atomic_disable(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -1009,12 +1015,78 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane, tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS); }
+static int tegra_cursor_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct drm_crtc_state *crtc_state; + int min_scale, max_scale; + int err; + + crtc_state = drm_atomic_get_existing_crtc_state(state, new_state->crtc); + if (WARN_ON(!crtc_state)) + return -EINVAL; + + if (!crtc_state->active) + return -EINVAL; + + if (plane->state->crtc != new_state->crtc || + plane->state->src_w != new_state->src_w || + plane->state->src_h != new_state->src_h || + plane->state->crtc_w != new_state->crtc_w || + plane->state->crtc_h != new_state->crtc_h || + plane->state->fb != new_state->fb || + plane->state->fb == NULL) + return -EINVAL; + + min_scale = (1 << 16) / 8; + max_scale = (8 << 16) / 1; + + err = drm_atomic_helper_check_plane_state(new_state, crtc_state, min_scale, max_scale, + true, true); + if (err < 0) + return err; + + if (new_state->visible != plane->state->visible) + return -EINVAL; + + return 0; +} + +static void tegra_cursor_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); + struct tegra_dc *dc = to_tegra_dc(new_state->crtc); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + + if (new_state->visible) { + struct tegra_plane *p = to_tegra_plane(plane); + u32 value; + + __tegra_cursor_atomic_update(plane, new_state); + + value = (WIN_A_ACT_REQ << p->index) << 8 | GENERAL_UPDATE; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + + value = (WIN_A_ACT_REQ << p->index) | GENERAL_ACT_REQ; + tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL); + (void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL); + } +} + static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = { .prepare_fb = tegra_plane_prepare_fb, .cleanup_fb = tegra_plane_cleanup_fb, .atomic_check = tegra_cursor_atomic_check, .atomic_update = tegra_cursor_atomic_update, .atomic_disable = tegra_cursor_atomic_disable, + .atomic_async_check = tegra_cursor_atomic_async_check, + .atomic_async_update = tegra_cursor_atomic_async_update, };
static const uint64_t linear_modifiers[] = {
On Thu, Sep 16, 2021 at 11:43:59AM +0200, Thierry Reding wrote:
From: Thierry Reding treding@nvidia.com
Add a few helpers to count the number of contiguous DMA chunks found in an SG table. This is useful to determine whether or not a mapping can be used by drivers whose devices need contiguous memory.
[...]
Is the counting of all blocks necessary if all to be checked is whether there is more than one continuous block?
Best Regards Michał Mirosław
dri-devel@lists.freedesktop.org