Some clients have a requirement to sandbox memory mappings for security and advanced features like SVM. This series adds support to enable per-instance pagetables as auxiliary domains in the arm-smmu driver and adds per-instance support for the Adreno GPU.
This patchset builds on the split pagetable support from [1]. In that series the TTBR1 address space is programmed for the default ("master") domain and enables support for auxiliary domains. Each new auxiliary domain will allocate a pagetable which the leaf driver can program through the usual IOMMU APIs. It can also query the physical address of the pagetable.
In the SMMU driver the first auxiliary domain will enable and program the TTBR0 space. Subsequent auxiliary domains won't touch the hardware. Similarly when the last auxiliary domain is detached the TTBR0 region will be disabled again.
In the Adreno driver each new file descriptor instance will create a new auxiliary domain / pagetable and use it for all the memory allocations of that instance. The driver will query the base address of each pagetable and switch them dynamically using the built-in table switch capability of the GPU. If any of these features fail the driver will automatically fall back to using the default (global) pagetable.
This patchset had previously been submitted as [2] but has been significantly modified since then.
Jordan
[1] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041438.html [2] https://patchwork.freedesktop.org/series/57441/
Jordan Crouse (6): iommu: Add DOMAIN_ATTR_PTBASE arm/smmu: Add auxiliary domain support for arm-smmuv2 drm/msm/adreno: ADd support for IOMMU auxiliary domains drm/msm: Add support to create target specific address spaces drm/msm/gpu: Add ttbr0 to the memptrs drm/msm/a6xx: Support per-instance pagetables
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 +++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 22 +++- drivers/gpu/drm/msm/msm_gpu.h | 2 + drivers/gpu/drm/msm/msm_iommu.c | 72 +++++++++++ drivers/gpu/drm/msm/msm_mmu.h | 3 + drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + drivers/iommu/arm-smmu.c | 230 +++++++++++++++++++++++++++++++--- drivers/iommu/arm-smmu.h | 3 + include/linux/iommu.h | 2 + 9 files changed, 405 insertions(+), 19 deletions(-)
Add support for creating a auxiliary domain from the IOMMU device to implement per-instance pagetables. Also add a helper function to return the pagetable base address (ttbr) and asid to the caller so that the GPU target code can set up the pagetable switch.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/msm_iommu.c | 72 +++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_mmu.h | 3 ++ 2 files changed, 75 insertions(+)
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index e773ef8..df0d70a 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -7,9 +7,17 @@ #include "msm_drv.h" #include "msm_mmu.h"
+/* + * It is up to us to assign ASIDS for our instances. Start at 32 to give a + * cushion to account for ASIDS assigned to real context banks + */ +static int msm_iommu_asid = 32; + struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; + u64 ttbr; + int asid; }; #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
@@ -58,6 +66,20 @@ static void msm_iommu_destroy(struct msm_mmu *mmu) kfree(iommu); }
+static void msm_iommu_aux_detach(struct msm_mmu *mmu) +{ + struct msm_iommu *iommu = to_msm_iommu(mmu); + + iommu_aux_detach_device(iommu->domain, mmu->dev); +} + +static const struct msm_mmu_funcs aux_funcs = { + .detach = msm_iommu_aux_detach, + .map = msm_iommu_map, + .unmap = msm_iommu_unmap, + .destroy = msm_iommu_destroy, +}; + static const struct msm_mmu_funcs funcs = { .detach = msm_iommu_detach, .map = msm_iommu_map, @@ -65,6 +87,56 @@ static const struct msm_mmu_funcs funcs = { .destroy = msm_iommu_destroy, };
+bool msm_iommu_get_ptinfo(struct msm_mmu *mmu, u64 *ttbr, u32 *asid) +{ + struct msm_iommu *iommu = to_msm_iommu(mmu); + + if (!iommu->ttbr) + return false; + + if (ttbr) + *ttbr = iommu->ttbr; + if (asid) + *asid = iommu->asid; + + return true; +} + +struct msm_mmu *msm_iommu_new_instance(struct device *dev, + struct iommu_domain *domain) +{ + struct msm_iommu *iommu; + u64 ptbase; + int ret; + + ret = iommu_aux_attach_device(domain, dev); + if (ret) + return ERR_PTR(ret); + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_PTBASE, &ptbase); + if (ret) { + iommu_aux_detach_device(domain, dev); + return ERR_PTR(ret); + } + + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); + if (!iommu) { + iommu_aux_detach_device(domain, dev); + return ERR_PTR(-ENOMEM); + } + + iommu->domain = domain; + iommu->ttbr = ptbase; + iommu->asid = msm_iommu_asid++; + + if (msm_iommu_asid > 0xff) + msm_iommu_asid = 32; + + msm_mmu_init(&iommu->base, dev, &aux_funcs); + + return &iommu->base; +} + struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) { struct msm_iommu *iommu; diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index bae9e8e..65a5cb2 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -32,6 +32,9 @@ static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev, }
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain); +struct msm_mmu *msm_iommu_new_instance(struct device *dev, + struct iommu_domain *domain); +bool msm_iommu_get_ptinfo(struct msm_mmu *mmu, u64 *ttbr, u32 *asid); struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu);
static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
Add support to create a GPU target specific address space for a context. For those targets that support per-instance pagetables they will return a new address space set up for the instance if possible otherwise just use the global device pagetable.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/msm_drv.c | 22 +++++++++++++++++++--- drivers/gpu/drm/msm/msm_gpu.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e4b750b..e485dc1 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -585,6 +585,18 @@ static void load_gpu(struct drm_device *dev) mutex_unlock(&init_lock); }
+static struct msm_gem_address_space *context_address_space(struct msm_gpu *gpu) +{ + if (!gpu) + return NULL; + + if (gpu->funcs->create_instance_space) + return gpu->funcs->create_instance_space(gpu); + + /* If all else fails use the default global space */ + return gpu->aspace; +} + static int context_init(struct drm_device *dev, struct drm_file *file) { struct msm_drm_private *priv = dev->dev_private; @@ -596,7 +608,7 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
msm_submitqueue_init(dev, ctx);
- ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL; + ctx->aspace = context_address_space(priv->gpu); file->driver_priv = ctx;
return 0; @@ -612,8 +624,12 @@ static int msm_open(struct drm_device *dev, struct drm_file *file) return context_init(dev, file); }
-static void context_close(struct msm_file_private *ctx) +static void context_close(struct msm_drm_private *priv, + struct msm_file_private *ctx) { + if (priv->gpu && ctx->aspace != priv->gpu->aspace) + msm_gem_address_space_put(ctx->aspace); + msm_submitqueue_close(ctx); kfree(ctx); } @@ -628,7 +644,7 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file) priv->lastctx = NULL; mutex_unlock(&dev->struct_mutex);
- context_close(ctx); + context_close(priv, ctx); }
static irqreturn_t msm_irq(int irq, void *arg) diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index d496b68..76636da 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -64,6 +64,8 @@ struct msm_gpu_funcs { void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq); struct msm_gem_address_space *(*create_address_space) (struct msm_gpu *gpu, struct platform_device *pdev); + struct msm_gem_address_space *(*create_instance_space) + (struct msm_gpu *gpu); };
struct msm_gpu {
Targets that support per-instance pagetable switching will have to keep track of which pagetable belongs to each instance to be able to recover for preemption.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 7764373..c5822bd 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -29,6 +29,7 @@ struct msm_gpu_submit_stats { struct msm_rbmemptrs { volatile uint32_t rptr; volatile uint32_t fence; + volatile uint64_t ttbr0;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT]; };
Add support for per-instance pagetables for a6xx targets. Add support to handle split pagetables and create a new instance if the needed IOMMU support exists and insert the necessary PM4 commands to trigger a pagetable switch at the beginning of a user command.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 89 +++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 9bec603c..e1a257e 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -12,6 +12,62 @@
#define GPU_PAS_ID 13
+static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + struct msm_file_private *ctx) +{ + u64 ttbr; + u32 asid; + + if (!msm_iommu_get_ptinfo(ctx->aspace->mmu, &ttbr, &asid)) + return; + + ttbr = ttbr | ((u64) asid) << 48; + + /* Turn off protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Turn on APIV mode to access critical regions */ + OUT_PKT4(ring, REG_A6XX_CP_MISC_CNTL, 1); + OUT_RING(ring, 1); + + /* Make sure the ME is synchronized before staring the update */ + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + /* Execute the table update */ + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, upper_32_bits(ttbr)); + /* CONTEXTIDR is currently unused */ + OUT_RING(ring, 0); + /* CONTEXTBANK is currently unused */ + OUT_RING(ring, 0); + + /* + * Write the new TTBR0 to the preemption records - this will be used to + * reload the pagetable if the current ring gets preempted out. + */ + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0))); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, upper_32_bits(ttbr)); + + /* Invalidate the draw state so we start off fresh */ + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, 0x40000); + OUT_RING(ring, 1); + OUT_RING(ring, 0); + + /* Turn off APRIV */ + OUT_PKT4(ring, REG_A6XX_CP_MISC_CNTL, 1); + OUT_RING(ring, 0); + + /* Turn off protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); +} + static inline bool _a6xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -89,6 +145,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i;
+ a6xx_set_pagetable(gpu, ring, ctx); + get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start));
@@ -878,6 +936,36 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+static struct msm_gem_address_space* +a6xx_create_instance_space(struct msm_gpu *gpu) +{ + struct msm_gem_address_space *aspace; + struct iommu_domain *iommu; + struct msm_mmu *mmu; + + if (!iommu_dev_has_feature(&gpu->pdev->dev, IOMMU_DEV_FEAT_AUX)) + return gpu->aspace; + + iommu = iommu_domain_alloc(&platform_bus_type); + if (!iommu) + return gpu->aspace; + + mmu = msm_iommu_new_instance(&gpu->pdev->dev, iommu); + if (IS_ERR(mmu)) { + iommu_domain_free(iommu); + return gpu->aspace; + } + + aspace = msm_gem_address_space_create(mmu, "gpu", + 0x100000000ULL, 0x1ffffffffULL); + if (IS_ERR(aspace)) { + mmu->funcs->destroy(mmu); + return gpu->aspace; + } + + return aspace; +} + static struct msm_gem_address_space * a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { @@ -951,6 +1039,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = a6xx_create_address_space, + .create_instance_space = a6xx_create_instance_space, }, .get_timestamp = a6xx_get_timestamp, };
dri-devel@lists.freedesktop.org