This is another iteration for the split pagetable support based on the suggestions from Robin and Will [1].
Background: In order to support per-context pagetables the GPU needs to enable split tables so that we can store global buffers in the TTBR1 space leaving the GPU free to program the TTBR0 register with the address of a context specific pagetable.
If the DOMAIN_ATTR_SPLIT_TABLES attribute is set on the domain before attaching, the context bank assigned to the domain will be programmed to allow translations in the TTBR1 space. Translations in the TTBR0 region will be disallowed because, as Robin pointe out, having a un-programmed TTBR0 register is dangerous.
The driver can determine if TTBR1 was successfully programmed by querying DOMAIN_ATTR_SPLIT_TABLES after attaching. The domain geometry will also be updated to reflect the virtual address space for the TTBR1 range.
Upcoming changes will allow auxiliary domains to be attached to the device which will enable and program TTBR0.
This patchset is based on top of linux-next-20200409
Change log:
v6: Cleanups for the arm-smmu TTBR1 patch from Will Deacon v4: Only program TTBR1 when split pagetables are requested. TTBR0 will be enabled later when an auxiliary domain is attached v3: Remove the implementation specific and make split pagetable support part of the generic configuration
[1] https://lists.linuxfoundation.org/pipermail/iommu/2020-January/041373.html
Jordan Crouse (5): iommu: Add DOMAIN_ATTR_SPLIT_TABLES iommu/arm-smmu: Add support for TTBR1 drm/msm: Attach the IOMMU device during initialization drm/msm: Refactor address space initialization drm/msm/a6xx: Support split pagetables
drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 16 ++++++++ drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 51 ++++++++++++++++++++++++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 23 ++++++++--- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 8 ++++ drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 18 +++------ drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 18 ++++----- drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c | 4 -- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 18 ++++----- drivers/gpu/drm/msm/msm_drv.h | 8 +--- drivers/gpu/drm/msm/msm_gem_vma.c | 36 +++-------------- drivers/gpu/drm/msm/msm_gpu.c | 49 +---------------------- drivers/gpu/drm/msm/msm_gpu.h | 4 +- drivers/gpu/drm/msm/msm_gpummu.c | 6 --- drivers/gpu/drm/msm/msm_iommu.c | 18 +++++---- drivers/gpu/drm/msm/msm_mmu.h | 1 - drivers/iommu/arm-smmu.c | 48 ++++++++++++++++++---- drivers/iommu/arm-smmu.h | 24 ++++++++--- include/linux/iommu.h | 2 + 21 files changed, 200 insertions(+), 155 deletions(-)
Everywhere an IOMMU object is created by msm_gpu_create_address_space the IOMMU device is attached immediately after. Instead of carrying around the infrastructure to do the attach from the device specific code do it directly in the msm_iommu_init() function. This gets it out of the way for more aggressive cleanups that follow.
Reviewed-by: Rob Clark robdclark@gmail.com Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 8 -------- drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 4 ---- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 7 ------- drivers/gpu/drm/msm/msm_gem_vma.c | 23 +++++++++++++++++++---- drivers/gpu/drm/msm/msm_gpu.c | 11 +---------- drivers/gpu/drm/msm/msm_gpummu.c | 6 ------ drivers/gpu/drm/msm/msm_iommu.c | 15 +++++++-------- drivers/gpu/drm/msm/msm_mmu.h | 1 - 8 files changed, 27 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index ce19f1d39367..6629a142574e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -772,7 +772,6 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) { struct iommu_domain *domain; struct msm_gem_address_space *aspace; - int ret;
domain = iommu_domain_alloc(&platform_bus_type); if (!domain) @@ -788,13 +787,6 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return PTR_ERR(aspace); }
- ret = aspace->mmu->funcs->attach(aspace->mmu); - if (ret) { - DPU_ERROR("failed to attach iommu %d\n", ret); - msm_gem_address_space_put(aspace); - return ret; - } - dpu_kms->base.aspace = aspace; return 0; } diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index dda05436f716..9dba37c6344f 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -518,10 +518,6 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) }
kms->aspace = aspace; - - ret = aspace->mmu->funcs->attach(aspace->mmu); - if (ret) - goto fail; } else { DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys " "contig buffers for scanout\n"); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 47b989834af1..1e9ba99fd9eb 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -644,13 +644,6 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) }
kms->aspace = aspace; - - ret = aspace->mmu->funcs->attach(aspace->mmu); - if (ret) { - DRM_DEV_ERROR(&pdev->dev, "failed to attach iommu: %d\n", - ret); - goto fail; - } } else { DRM_DEV_INFO(&pdev->dev, "no iommu, fallback to phys contig buffers for scanout\n"); diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 1af5354bcd46..91d993a16850 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -131,8 +131,8 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain, const char *name) { struct msm_gem_address_space *aspace; - u64 size = domain->geometry.aperture_end - - domain->geometry.aperture_start; + u64 start = domain->geometry.aperture_start; + u64 size = domain->geometry.aperture_end - start;
aspace = kzalloc(sizeof(*aspace), GFP_KERNEL); if (!aspace) @@ -141,9 +141,18 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain, spin_lock_init(&aspace->lock); aspace->name = name; aspace->mmu = msm_iommu_new(dev, domain); + if (IS_ERR(aspace->mmu)) { + int ret = PTR_ERR(aspace->mmu);
- drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT), - size >> PAGE_SHIFT); + kfree(aspace); + return ERR_PTR(ret); + } + + /* + * Attaching the IOMMU device changes the aperture values so use the + * cached values instead + */ + drm_mm_init(&aspace->mm, start >> PAGE_SHIFT, size >> PAGE_SHIFT);
kref_init(&aspace->kref);
@@ -164,6 +173,12 @@ msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu, spin_lock_init(&aspace->lock); aspace->name = name; aspace->mmu = msm_gpummu_new(dev, gpu); + if (IS_ERR(aspace->mmu)) { + int ret = PTR_ERR(aspace->mmu); + + kfree(aspace); + return ERR_PTR(ret); + }
drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT), size >> PAGE_SHIFT); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 615c5cda5389..b6f0d7204da9 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -826,7 +826,6 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, uint64_t va_start, uint64_t va_end) { struct msm_gem_address_space *aspace; - int ret;
/* * Setup IOMMU.. eventually we will (I think) do this once per context @@ -851,17 +850,9 @@ msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, va_start, va_end); }
- if (IS_ERR(aspace)) { + if (IS_ERR(aspace)) DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n", PTR_ERR(aspace)); - return ERR_CAST(aspace); - } - - ret = aspace->mmu->funcs->attach(aspace->mmu); - if (ret) { - msm_gem_address_space_put(aspace); - return ERR_PTR(ret); - }
return aspace; } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 34980d8eb7ad..0ad0f848560a 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -21,11 +21,6 @@ struct msm_gpummu { #define GPUMMU_PAGE_SIZE SZ_4K #define TABLE_SIZE (sizeof(uint32_t) * GPUMMU_VA_RANGE / GPUMMU_PAGE_SIZE)
-static int msm_gpummu_attach(struct msm_mmu *mmu) -{ - return 0; -} - static void msm_gpummu_detach(struct msm_mmu *mmu) { } @@ -85,7 +80,6 @@ static void msm_gpummu_destroy(struct msm_mmu *mmu) }
static const struct msm_mmu_funcs funcs = { - .attach = msm_gpummu_attach, .detach = msm_gpummu_detach, .map = msm_gpummu_map, .unmap = msm_gpummu_unmap, diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index ad58cfe5998e..544c51955717 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -23,13 +23,6 @@ static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, return 0; }
-static int msm_iommu_attach(struct msm_mmu *mmu) -{ - struct msm_iommu *iommu = to_msm_iommu(mmu); - - return iommu_attach_device(iommu->domain, mmu->dev); -} - static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); @@ -66,7 +59,6 @@ static void msm_iommu_destroy(struct msm_mmu *mmu) }
static const struct msm_mmu_funcs funcs = { - .attach = msm_iommu_attach, .detach = msm_iommu_detach, .map = msm_iommu_map, .unmap = msm_iommu_unmap, @@ -76,6 +68,7 @@ static const struct msm_mmu_funcs funcs = { struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) { struct msm_iommu *iommu; + int ret;
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -85,5 +78,11 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) msm_mmu_init(&iommu->base, dev, &funcs); iommu_set_fault_handler(domain, msm_fault_handler, iommu);
+ ret = iommu_attach_device(iommu->domain, dev); + if (ret) { + kfree(iommu); + return ERR_PTR(ret); + } + return &iommu->base; } diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 67a623f14319..bae9e8e67ec1 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -10,7 +10,6 @@ #include <linux/iommu.h>
struct msm_mmu_funcs { - int (*attach)(struct msm_mmu *mmu); void (*detach)(struct msm_mmu *mmu); int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt, unsigned len, int prot);
Refactor how address space initialization works. Instead of having the address space function create the MMU object (and thus require separate but equal functions for gpummu and iommu) use a single function and pass the MMU struct in. Make the generic code cleaner by using target specific functions to create the address space so a2xx can do its own thing in its own space. For all the other targets use a generic helper to initialize IOMMU but leave the door open for newer targets to use customization if they need it.
Reviewed-by: Rob Clark robdclark@gmail.com Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 16 ++++++++ drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 1 + drivers/gpu/drm/msm/adreno/adreno_gpu.c | 23 ++++++++--- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 8 ++++ drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c | 10 ++--- drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 14 ++++--- drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c | 4 -- drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c | 11 ++++- drivers/gpu/drm/msm/msm_drv.h | 8 +--- drivers/gpu/drm/msm/msm_gem_vma.c | 51 +++--------------------- drivers/gpu/drm/msm/msm_gpu.c | 40 +------------------ drivers/gpu/drm/msm/msm_gpu.h | 4 +- drivers/gpu/drm/msm/msm_iommu.c | 3 ++ 16 files changed, 82 insertions(+), 114 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 1f83bc18d500..60f6472a3e58 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -401,6 +401,21 @@ static struct msm_gpu_state *a2xx_gpu_state_get(struct msm_gpu *gpu) return state; }
+static struct msm_gem_address_space * +a2xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) +{ + struct msm_mmu *mmu = msm_gpummu_new(&pdev->dev, gpu); + struct msm_gem_address_space *aspace; + + aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, + SZ_16M + 0xfff * SZ_64K); + + if (IS_ERR(aspace) && !IS_ERR(mmu)) + mmu->funcs->destroy(mmu); + + return aspace; +} + /* Register offset defines for A2XX - copy of A3XX */ static const unsigned int a2xx_register_offsets[REG_ADRENO_REGISTER_MAX] = { REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE), @@ -429,6 +444,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_state_get = a2xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, + .create_address_space = a2xx_create_address_space, }, };
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index b67f88872726..0a5ea9f56cb8 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -441,6 +441,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_state_get = a3xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, + .create_address_space = adreno_iommu_create_address_space, }, };
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index 253d8d85daad..b626afb0627d 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -532,6 +532,7 @@ static const struct adreno_gpu_funcs funcs = { #endif .gpu_state_get = a4xx_gpu_state_get, .gpu_state_put = adreno_gpu_state_put, + .create_address_space = adreno_iommu_create_address_space, }, .get_timestamp = a4xx_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 724024a2243a..e81b1deaf535 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1439,6 +1439,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_busy = a5xx_gpu_busy, .gpu_state_get = a5xx_gpu_state_get, .gpu_state_put = a5xx_gpu_state_put, + .create_address_space = adreno_iommu_create_address_space, }, .get_timestamp = a5xx_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 68af24150de5..02ade43d6335 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -847,6 +847,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif + .create_address_space = adreno_iommu_create_address_space, }, .get_timestamp = a6xx_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1d5c43c22269..4f0ef431a8ea 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -185,6 +185,23 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid) return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid); }
+struct msm_gem_address_space * +adreno_iommu_create_address_space(struct msm_gpu *gpu, + struct platform_device *pdev) +{ + struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type); + struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu); + struct msm_gem_address_space *aspace; + + aspace = msm_gem_address_space_create(mmu, "gpu", SZ_16M, + 0xfffffff); + + if (IS_ERR(aspace) && !IS_ERR(mmu)) + mmu->funcs->destroy(mmu); + + return aspace; +} + int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -988,12 +1005,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
- adreno_gpu_config.va_start = SZ_16M; - adreno_gpu_config.va_end = 0xffffffff; - /* maximum range of a2xx mmu */ - if (adreno_is_a2xx(adreno_gpu)) - adreno_gpu_config.va_end = SZ_16M + 0xfff * SZ_64K; - adreno_gpu_config.nr_rings = nr_rings;
adreno_get_pwrlevels(&pdev->dev, gpu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 9ff4e550e7bd..2256f2b46f39 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -272,6 +272,14 @@ void adreno_gpu_state_destroy(struct msm_gpu_state *state); int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state); int adreno_gpu_state_put(struct msm_gpu_state *state);
+/* + * Common helper function to initialize the default address space for arm-smmu + * attached targets + */ +struct msm_gem_address_space * +adreno_iommu_create_address_space(struct msm_gpu *gpu, + struct platform_device *pdev); + /* * For a5xx and a6xx targets load the zap shader that is used to pull the GPU * out of secure mode diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index 6629a142574e..b8615d4fe8a3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -772,18 +772,18 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) { struct iommu_domain *domain; struct msm_gem_address_space *aspace; + struct msm_mmu *mmu;
domain = iommu_domain_alloc(&platform_bus_type); if (!domain) return 0;
- domain->geometry.aperture_start = 0x1000; - domain->geometry.aperture_end = 0xffffffff; + mmu = msm_iommu_new(dpu_kms->dev->dev, domain); + aspace = msm_gem_address_space_create(mmu, "dpu1", + 0x1000, 0xfffffff);
- aspace = msm_gem_address_space_create(dpu_kms->dev->dev, - domain, "dpu1"); if (IS_ERR(aspace)) { - iommu_domain_free(domain); + mmu->funcs->destroy(mmu); return PTR_ERR(aspace); }
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c index 9dba37c6344f..08897184b1d9 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c @@ -510,9 +510,15 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev) mdelay(16);
if (config->iommu) { - aspace = msm_gem_address_space_create(&pdev->dev, - config->iommu, "mdp4"); + struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, + config->iommu); + + aspace = msm_gem_address_space_create(mmu, + "mdp4", 0x1000, 0xffffffff); + if (IS_ERR(aspace)) { + if (!IS_ERR(mmu)) + mmu->funcs->destroy(mmu); ret = PTR_ERR(aspace); goto fail; } @@ -565,10 +571,6 @@ static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev) /* TODO: Chips that aren't apq8064 have a 200 Mhz max_clk */ config.max_clk = 266667000; config.iommu = iommu_domain_alloc(&platform_bus_type); - if (config.iommu) { - config.iommu->geometry.aperture_start = 0x1000; - config.iommu->geometry.aperture_end = 0xffffffff; - }
return &config; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c index e3c4c250238b..247f381b60fa 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c @@ -941,10 +941,6 @@ static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev) static struct mdp5_cfg_platform config = {};
config.iommu = iommu_domain_alloc(&platform_bus_type); - if (config.iommu) { - config.iommu->geometry.aperture_start = 0x1000; - config.iommu->geometry.aperture_end = 0xffffffff; - }
return &config; } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c index 1e9ba99fd9eb..afd676155a9c 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c @@ -632,13 +632,20 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev) mdelay(16);
if (config->platform.iommu) { + struct msm_mmu *mmu; + iommu_dev = &pdev->dev; if (!dev_iommu_fwspec_get(iommu_dev)) iommu_dev = iommu_dev->parent;
- aspace = msm_gem_address_space_create(iommu_dev, - config->platform.iommu, "mdp5"); + mmu = msm_iommu_new(iommu_dev, config->platform.iommu); + + aspace = msm_gem_address_space_create(mmu, "mdp5", + 0x1000, 0xffffffff); + if (IS_ERR(aspace)) { + if (!IS_ERR(mmu)) + mmu->funcs->destroy(mmu); ret = PTR_ERR(aspace); goto fail; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 194d900a460e..4788adcb3d9c 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -250,12 +250,8 @@ void msm_gem_close_vma(struct msm_gem_address_space *aspace, void msm_gem_address_space_put(struct msm_gem_address_space *aspace);
struct msm_gem_address_space * -msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain, - const char *name); - -struct msm_gem_address_space * -msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu, - const char *name, uint64_t va_start, uint64_t va_end); +msm_gem_address_space_create(struct msm_mmu *mmu, const char *name, + u64 va_start, u64 size);
int msm_register_mmu(struct drm_device *dev, struct msm_mmu *mmu); void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu); diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 91d993a16850..d0c355a21aca 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -125,46 +125,14 @@ int msm_gem_init_vma(struct msm_gem_address_space *aspace, return 0; }
- struct msm_gem_address_space * -msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain, - const char *name) +msm_gem_address_space_create(struct msm_mmu *mmu, const char *name, + u64 va_start, u64 size) { struct msm_gem_address_space *aspace; - u64 start = domain->geometry.aperture_start; - u64 size = domain->geometry.aperture_end - start; - - aspace = kzalloc(sizeof(*aspace), GFP_KERNEL); - if (!aspace) - return ERR_PTR(-ENOMEM);
- spin_lock_init(&aspace->lock); - aspace->name = name; - aspace->mmu = msm_iommu_new(dev, domain); - if (IS_ERR(aspace->mmu)) { - int ret = PTR_ERR(aspace->mmu); - - kfree(aspace); - return ERR_PTR(ret); - } - - /* - * Attaching the IOMMU device changes the aperture values so use the - * cached values instead - */ - drm_mm_init(&aspace->mm, start >> PAGE_SHIFT, size >> PAGE_SHIFT); - - kref_init(&aspace->kref); - - return aspace; -} - -struct msm_gem_address_space * -msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu, - const char *name, uint64_t va_start, uint64_t va_end) -{ - struct msm_gem_address_space *aspace; - u64 size = va_end - va_start; + if (IS_ERR(mmu)) + return ERR_CAST(mmu);
aspace = kzalloc(sizeof(*aspace), GFP_KERNEL); if (!aspace) @@ -172,16 +140,9 @@ msm_gem_address_space_create_a2xx(struct device *dev, struct msm_gpu *gpu,
spin_lock_init(&aspace->lock); aspace->name = name; - aspace->mmu = msm_gpummu_new(dev, gpu); - if (IS_ERR(aspace->mmu)) { - int ret = PTR_ERR(aspace->mmu); - - kfree(aspace); - return ERR_PTR(ret); - } + aspace->mmu = mmu;
- drm_mm_init(&aspace->mm, (va_start >> PAGE_SHIFT), - size >> PAGE_SHIFT); + drm_mm_init(&aspace->mm, va_start >> PAGE_SHIFT, size >> PAGE_SHIFT);
kref_init(&aspace->kref);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index b6f0d7204da9..a22d30622306 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -821,42 +821,6 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) return 0; }
-static struct msm_gem_address_space * -msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, - uint64_t va_start, uint64_t va_end) -{ - struct msm_gem_address_space *aspace; - - /* - * Setup IOMMU.. eventually we will (I think) do this once per context - * and have separate page tables per context. For now, to keep things - * simple and to get something working, just use a single address space: - */ - if (!adreno_is_a2xx(to_adreno_gpu(gpu))) { - struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type); - if (!iommu) - return NULL; - - iommu->geometry.aperture_start = va_start; - iommu->geometry.aperture_end = va_end; - - DRM_DEV_INFO(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); - - aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); - if (IS_ERR(aspace)) - iommu_domain_free(iommu); - } else { - aspace = msm_gem_address_space_create_a2xx(&pdev->dev, gpu, "gpu", - va_start, va_end); - } - - if (IS_ERR(aspace)) - DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n", - PTR_ERR(aspace)); - - return aspace; -} - int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) @@ -929,8 +893,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
msm_devfreq_init(gpu);
- gpu->aspace = msm_gpu_create_address_space(gpu, pdev, - config->va_start, config->va_end); + + gpu->aspace = gpu->funcs->create_address_space(gpu, pdev);
if (gpu->aspace == NULL) DRM_DEV_INFO(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index be5bc2e8425c..d496b68e58a9 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -21,8 +21,6 @@ struct msm_gpu_state;
struct msm_gpu_config { const char *ioname; - uint64_t va_start; - uint64_t va_end; unsigned int nr_rings; };
@@ -64,6 +62,8 @@ struct msm_gpu_funcs { int (*gpu_state_put)(struct msm_gpu_state *state); unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq); + struct msm_gem_address_space *(*create_address_space) + (struct msm_gpu *gpu, struct platform_device *pdev); };
struct msm_gpu { diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 544c51955717..e773ef8c7203 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -70,6 +70,9 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) struct msm_iommu *iommu; int ret;
+ if (!domain) + return ERR_PTR(-ENODEV); + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) return ERR_PTR(-ENOMEM);
On Thu, Apr 9, 2020 at 4:34 PM Jordan Crouse jcrouse@codeaurora.org wrote:
Refactor how address space initialization works. Instead of having the address space function create the MMU object (and thus require separate but equal functions for gpummu and iommu) use a single function and pass the MMU struct in. Make the generic code cleaner by using target specific functions to create the address space so a2xx can do its own thing in its own space. For all the other targets use a generic helper to initialize IOMMU but leave the door open for newer targets to use customization if they need it.
I'm seeing regressions in dEQP-VK.memory.allocation.random.* on cheza after this commit. The symptom is that large allocations fail with -ENOSPC from MSM_GEM_INFO(IOVA).
Possibly relevant change from having stuffed some debug info in:
before: [ 3.791436] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000000 + 0xfeffffff [ 3.801672] platform 506a000.gmu: Adding to iommu group 6 [ 3.807359] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x0 + 0x7fffffff [ 3.817140] msm ae00000.mdss: bound 5000000.gpu (ops a3xx_ops) [ 3.823212] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: vbif_nrt [ 3.832429] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: regdma [ 3.841478] [drm:dpu_kms_hw_init:878] dpu hardware revision:0x40000000 [ 3.848193] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000 + 0xffffefff
after:
[ 3.798707] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000000 + 0xfffffff [ 3.808731] platform 506a000.gmu: Adding to iommu group 6 [ 3.814440] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x0 + 0x7fffffff [ 3.820494] hub 2-1:1.0: USB hub found [ 3.824108] msm ae00000.mdss: bound 5000000.gpu (ops a3xx_ops) [ 3.828554] hub 2-1:1.0: 4 ports detected [ 3.833756] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: vbif_nrt [ 3.847038] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: regdma [ 3.856095] [drm:dpu_kms_hw_init:878] dpu hardware revision:0x40000000 [ 3.862840] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000 + 0xfffffff
256MB for GMU address space?
On Wed, Jun 17, 2020 at 1:16 PM Eric Anholt eric@anholt.net wrote:
On Thu, Apr 9, 2020 at 4:34 PM Jordan Crouse jcrouse@codeaurora.org wrote:
Refactor how address space initialization works. Instead of having the address space function create the MMU object (and thus require separate but equal functions for gpummu and iommu) use a single function and pass the MMU struct in. Make the generic code cleaner by using target specific functions to create the address space so a2xx can do its own thing in its own space. For all the other targets use a generic helper to initialize IOMMU but leave the door open for newer targets to use customization if they need it.
I'm seeing regressions in dEQP-VK.memory.allocation.random.* on cheza after this commit. The symptom is that large allocations fail with -ENOSPC from MSM_GEM_INFO(IOVA).
Possibly relevant change from having stuffed some debug info in:
before: [ 3.791436] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000000 + 0xfeffffff [ 3.801672] platform 506a000.gmu: Adding to iommu group 6 [ 3.807359] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x0 + 0x7fffffff [ 3.817140] msm ae00000.mdss: bound 5000000.gpu (ops a3xx_ops) [ 3.823212] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: vbif_nrt [ 3.832429] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: regdma [ 3.841478] [drm:dpu_kms_hw_init:878] dpu hardware revision:0x40000000 [ 3.848193] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000 + 0xffffefff
after:
[ 3.798707] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000000 + 0xfffffff [ 3.808731] platform 506a000.gmu: Adding to iommu group 6 [ 3.814440] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x0 + 0x7fffffff [ 3.820494] hub 2-1:1.0: USB hub found [ 3.824108] msm ae00000.mdss: bound 5000000.gpu (ops a3xx_ops) [ 3.828554] hub 2-1:1.0: 4 ports detected [ 3.833756] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: vbif_nrt [ 3.847038] msm_dpu ae01000.mdp: [drm:msm_ioremap] *ERROR* failed to get memory resource: regdma [ 3.856095] [drm:dpu_kms_hw_init:878] dpu hardware revision:0x40000000 [ 3.862840] [drm:msm_gem_address_space_create] *ERROR* msmgem address space create: 0x1000 + 0xfffffff
256MB for GMU address space?
Found the bug, fixes at the last 2 commits of https://github.com/anholt/linux/tree/drm-msm-address-space
I'm going to try having another go at convincing gmail to let git send-email through, but all the howtos in the world didn't work last time (gsuite has different behavior from normal gmail).
Attempt to enable split pagetables if the arm-smmu driver supports it. This will move the default address space from the default region to the address range assigned to TTBR1. The behavior should be transparent to the driver for now but it gets the default buffers out of the way when we want to start swapping TTBR0 for context-specific pagetables.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 52 ++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 02ade43d6335..b27daa77723c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -825,6 +825,56 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+static struct msm_gem_address_space * +a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) +{ + struct iommu_domain *iommu = iommu_domain_alloc(&platform_bus_type); + struct msm_gem_address_space *aspace; + struct msm_mmu *mmu; + u64 start, size; + u32 val = 1; + int ret; + + if (!iommu) + return ERR_PTR(-ENOMEM); + + /* + * Try to request split pagetables - the request has to be made before + * the domian is attached + */ + iommu_domain_set_attr(iommu, DOMAIN_ATTR_SPLIT_TABLES, &val); + + mmu = msm_iommu_new(&pdev->dev, iommu); + if (IS_ERR(mmu)) { + iommu_domain_free(iommu); + return ERR_CAST(mmu); + } + + /* + * After the domain is attached, see if the split tables were actually + * successful. + */ + ret = iommu_domain_get_attr(iommu, DOMAIN_ATTR_SPLIT_TABLES, &val); + if (!ret && val) { + /* + * The aperture start will be at the beginning of the TTBR1 + * space so use that as a base + */ + start = iommu->geometry.aperture_start; + size = 0xffffffff; + } else { + /* Otherwise use the legacy 32 bit region */ + start = SZ_16M; + size = 0xffffffff - SZ_16M; + } + + aspace = msm_gem_address_space_create(mmu, "gpu", start, size); + if (IS_ERR(aspace)) + iommu_domain_free(iommu); + + return aspace; +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -847,7 +897,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_get = a6xx_gpu_state_get, .gpu_state_put = a6xx_gpu_state_put, #endif - .create_address_space = adreno_iommu_create_address_space, + .create_address_space = a6xx_create_address_space, }, .get_timestamp = a6xx_get_timestamp, };
dri-devel@lists.freedesktop.org