This is a new refresh of support for auxiliary domains for arm-smmu-v2 and per-instance pagetables for drm/msm. The big change here from past efforts is that outside of creating a single aux-domain to enable TTBR0 all of the per-instance pagetables are created and managed exclusively in drm/msm without involving the arm-smmu driver. This fits in with the suggested model of letting the GPU hardware do what it needs and leave the arm-smmu driver blissfully unaware.
Almost. In order to set up the io-pgtable properly in drm/msm we need to query the pagetable configuration from the current active domain and we need to rely on the iommu API to flush TLBs after a unmap. In the future we can optimize this in the drm/msm driver to track the state of the TLBs but for now the big hammer lets us get off the ground.
This series is built on the split pagetable support [1].
[1] https://patchwork.kernel.org/patch/11628543/
v2: Remove unneeded cruft in the a6xx page switch sequence
Jordan Crouse (6): iommu/arm-smmu: Add auxiliary domain support for arm-smmuv2 iommu/io-pgtable: Allow a pgtable implementation to skip TLB operations iommu/arm-smmu: Add a domain attribute to pass the pagetable config drm/msm: Add support to create a local pagetable drm/msm: Add support for address space instances drm/msm/a6xx: Add support for per-instance pagetables
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++++ drivers/gpu/drm/msm/msm_drv.c | 15 +- drivers/gpu/drm/msm/msm_drv.h | 4 + drivers/gpu/drm/msm/msm_gem_vma.c | 9 + drivers/gpu/drm/msm/msm_gpu.c | 17 ++ drivers/gpu/drm/msm/msm_gpu.h | 5 + drivers/gpu/drm/msm/msm_gpummu.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 180 +++++++++++++++++++- drivers/gpu/drm/msm/msm_mmu.h | 16 +- drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + drivers/iommu/arm-smmu.c | 231 ++++++++++++++++++++++++-- drivers/iommu/arm-smmu.h | 1 + include/linux/io-pgtable.h | 11 +- include/linux/iommu.h | 1 + 14 files changed, 507 insertions(+), 29 deletions(-)
Add support to create a io-pgtable for use by targets that support per-instance pagetables. In order to support per-instance pagetables the GPU SMMU device needs to have the qcom,adreno-smmu compatible string and split pagetables and auxiliary domains need to be supported and enabled.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/msm_gpummu.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 180 ++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/msm_mmu.h | 16 ++- 3 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..aab121f4beb7 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu) }
gpummu->gpu = gpu; - msm_mmu_init(&gpummu->base, dev, &funcs); + msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);
return &gpummu->base; } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 1b6635504069..f455c597f76d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -4,15 +4,192 @@ * Author: Rob Clark robdclark@gmail.com */
+#include <linux/io-pgtable.h> #include "msm_drv.h" #include "msm_mmu.h"
struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; + struct iommu_domain *aux_domain; }; + #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+struct msm_iommu_pagetable { + struct msm_mmu base; + struct msm_mmu *parent; + struct io_pgtable_ops *pgtbl_ops; + phys_addr_t ttbr; + u32 asid; +}; + +static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) +{ + return container_of(mmu, struct msm_iommu_pagetable, base); +} + +static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, + size_t size) +{ + struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); + struct io_pgtable_ops *ops = pagetable->pgtbl_ops; + size_t unmapped = 0; + + /* Unmap the block one page at a time */ + while (size) { + unmapped += ops->unmap(ops, iova, 4096, NULL); + iova += 4096; + size -= 4096; + } + + iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain); + + return (unmapped == size) ? 0 : -EINVAL; +} + +static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, + struct sg_table *sgt, size_t len, int prot) +{ + struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); + struct io_pgtable_ops *ops = pagetable->pgtbl_ops; + struct scatterlist *sg; + size_t mapped = 0; + u64 addr = iova; + unsigned int i; + + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + size_t size = sg->length; + phys_addr_t phys = sg_phys(sg); + + /* Map the block one page at a time */ + while (size) { + if (ops->map(ops, addr, phys, 4096, prot)) { + msm_iommu_pagetable_unmap(mmu, iova, mapped); + return -EINVAL; + } + + phys += 4096; + addr += 4096; + size -= 4096; + mapped += 4096; + } + } + + return 0; +} + +static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) +{ + struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); + + free_io_pgtable_ops(pagetable->pgtbl_ops); + kfree(pagetable); +} + +/* + * Given a parent device, create and return an aux domain. This will enable the + * TTBR0 region + */ +static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent) +{ + struct msm_iommu *iommu = to_msm_iommu(parent); + struct iommu_domain *domain; + int ret; + + if (iommu->aux_domain) + return iommu->aux_domain; + + if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX)) + return ERR_PTR(-ENODEV); + + domain = iommu_domain_alloc(&platform_bus_type); + if (!domain) + return ERR_PTR(-ENODEV); + + ret = iommu_aux_attach_device(domain, parent->dev); + if (ret) { + iommu_domain_free(domain); + return ERR_PTR(ret); + } + + iommu->aux_domain = domain; + return domain; +} + +int msm_iommu_pagetable_params(struct msm_mmu *mmu, + phys_addr_t *ttbr, int *asid) +{ + struct msm_iommu_pagetable *pagetable; + + if (mmu->type != MSM_MMU_IOMMU_PAGETABLE) + return -EINVAL; + + pagetable = to_pagetable(mmu); + + if (ttbr) + *ttbr = pagetable->ttbr; + + if (asid) + *asid = pagetable->asid; + + return 0; +} + +static const struct msm_mmu_funcs pagetable_funcs = { + .map = msm_iommu_pagetable_map, + .unmap = msm_iommu_pagetable_unmap, + .destroy = msm_iommu_pagetable_destroy, +}; + +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) +{ + static int next_asid = 16; + struct msm_iommu_pagetable *pagetable; + struct iommu_domain *aux_domain; + struct io_pgtable_cfg cfg; + int ret; + + /* Make sure that the parent has a aux domain attached */ + aux_domain = msm_iommu_get_aux_domain(parent); + if (IS_ERR(aux_domain)) + return ERR_CAST(aux_domain); + + /* Get the pagetable configuration from the aux domain */ + ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg); + if (ret) + return ERR_PTR(ret); + + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); + if (!pagetable) + return ERR_PTR(-ENOMEM); + + msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs, + MSM_MMU_IOMMU_PAGETABLE); + + cfg.tlb = NULL; + + pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, + &cfg, aux_domain); + + if (!pagetable->pgtbl_ops) { + kfree(pagetable); + return ERR_PTR(-ENOMEM); + } + + + /* Needed later for TLB flush */ + pagetable->parent = parent; + pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr; + + pagetable->asid = next_asid; + next_asid = (next_asid + 1) % 255; + if (next_asid < 16) + next_asid = 16; + + return &pagetable->base; +} + static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags, void *arg) { @@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, if (iova & BIT_ULL(48)) iova |= GENMASK_ULL(63, 49);
+ ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); WARN_ON(!ret);
@@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) return ERR_PTR(-ENOMEM);
iommu->domain = domain; - msm_mmu_init(&iommu->base, dev, &funcs); + msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); iommu_set_fault_handler(domain, msm_fault_handler, iommu);
ret = iommu_attach_device(iommu->domain, dev); diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 3a534ee59bf6..61ade89d9e48 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -17,18 +17,26 @@ struct msm_mmu_funcs { void (*destroy)(struct msm_mmu *mmu); };
+enum msm_mmu_type { + MSM_MMU_GPUMMU, + MSM_MMU_IOMMU, + MSM_MMU_IOMMU_PAGETABLE, +}; + struct msm_mmu { const struct msm_mmu_funcs *funcs; struct device *dev; int (*handler)(void *arg, unsigned long iova, int flags); void *arg; + enum msm_mmu_type type; };
static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev, - const struct msm_mmu_funcs *funcs) + const struct msm_mmu_funcs *funcs, enum msm_mmu_type type) { mmu->dev = dev; mmu->funcs = funcs; + mmu->type = type; }
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain); @@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg, mmu->handler = handler; }
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent); + void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base, dma_addr_t *tran_error);
+ +int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr, + int *asid); + #endif /* __MSM_MMU_H__ */
On 2020-06-26 21:04, Jordan Crouse wrote:
Add support to create a io-pgtable for use by targets that support per-instance pagetables. In order to support per-instance pagetables the GPU SMMU device needs to have the qcom,adreno-smmu compatible string and split pagetables and auxiliary domains need to be supported and enabled.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/msm_gpummu.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 180 ++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/msm_mmu.h | 16 ++- 3 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..aab121f4beb7 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu) }
gpummu->gpu = gpu;
- msm_mmu_init(&gpummu->base, dev, &funcs);
msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU);
return &gpummu->base; }
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 1b6635504069..f455c597f76d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -4,15 +4,192 @@
- Author: Rob Clark robdclark@gmail.com
*/
+#include <linux/io-pgtable.h> #include "msm_drv.h" #include "msm_mmu.h"
struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain;
- struct iommu_domain *aux_domain; };
- #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+struct msm_iommu_pagetable {
- struct msm_mmu base;
- struct msm_mmu *parent;
- struct io_pgtable_ops *pgtbl_ops;
- phys_addr_t ttbr;
- u32 asid;
+};
+static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) +{
- return container_of(mmu, struct msm_iommu_pagetable, base);
+}
+static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
size_t size)
+{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
- size_t unmapped = 0;
- /* Unmap the block one page at a time */
- while (size) {
unmapped += ops->unmap(ops, iova, 4096, NULL);
iova += 4096;
size -= 4096;
- }
- iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);
- return (unmapped == size) ? 0 : -EINVAL;
+}
Remember in patch #1 when you said "Then 'domain' can be used like any other iommu domain to map and unmap iova addresses in the pagetable."?
This appears to be very much not that :/
Robin.
+static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
struct sg_table *sgt, size_t len, int prot)
+{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
- struct scatterlist *sg;
- size_t mapped = 0;
- u64 addr = iova;
- unsigned int i;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
size_t size = sg->length;
phys_addr_t phys = sg_phys(sg);
/* Map the block one page at a time */
while (size) {
if (ops->map(ops, addr, phys, 4096, prot)) {
msm_iommu_pagetable_unmap(mmu, iova, mapped);
return -EINVAL;
}
phys += 4096;
addr += 4096;
size -= 4096;
mapped += 4096;
}
- }
- return 0;
+}
+static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) +{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- free_io_pgtable_ops(pagetable->pgtbl_ops);
- kfree(pagetable);
+}
+/*
- Given a parent device, create and return an aux domain. This will enable the
- TTBR0 region
- */
+static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent) +{
- struct msm_iommu *iommu = to_msm_iommu(parent);
- struct iommu_domain *domain;
- int ret;
- if (iommu->aux_domain)
return iommu->aux_domain;
- if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))
return ERR_PTR(-ENODEV);
- domain = iommu_domain_alloc(&platform_bus_type);
- if (!domain)
return ERR_PTR(-ENODEV);
- ret = iommu_aux_attach_device(domain, parent->dev);
- if (ret) {
iommu_domain_free(domain);
return ERR_PTR(ret);
- }
- iommu->aux_domain = domain;
- return domain;
+}
+int msm_iommu_pagetable_params(struct msm_mmu *mmu,
phys_addr_t *ttbr, int *asid)
+{
- struct msm_iommu_pagetable *pagetable;
- if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
return -EINVAL;
- pagetable = to_pagetable(mmu);
- if (ttbr)
*ttbr = pagetable->ttbr;
- if (asid)
*asid = pagetable->asid;
- return 0;
+}
+static const struct msm_mmu_funcs pagetable_funcs = {
.map = msm_iommu_pagetable_map,
.unmap = msm_iommu_pagetable_unmap,
.destroy = msm_iommu_pagetable_destroy,
+};
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) +{
- static int next_asid = 16;
- struct msm_iommu_pagetable *pagetable;
- struct iommu_domain *aux_domain;
- struct io_pgtable_cfg cfg;
- int ret;
- /* Make sure that the parent has a aux domain attached */
- aux_domain = msm_iommu_get_aux_domain(parent);
- if (IS_ERR(aux_domain))
return ERR_CAST(aux_domain);
- /* Get the pagetable configuration from the aux domain */
- ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);
- if (ret)
return ERR_PTR(ret);
- pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
- if (!pagetable)
return ERR_PTR(-ENOMEM);
- msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,
MSM_MMU_IOMMU_PAGETABLE);
- cfg.tlb = NULL;
- pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
&cfg, aux_domain);
- if (!pagetable->pgtbl_ops) {
kfree(pagetable);
return ERR_PTR(-ENOMEM);
- }
- /* Needed later for TLB flush */
- pagetable->parent = parent;
- pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;
- pagetable->asid = next_asid;
- next_asid = (next_asid + 1) % 255;
- if (next_asid < 16)
next_asid = 16;
- return &pagetable->base;
+}
- static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags, void *arg) {
@@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, if (iova & BIT_ULL(48)) iova |= GENMASK_ULL(63, 49);
- ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); WARN_ON(!ret);
@@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) return ERR_PTR(-ENOMEM);
iommu->domain = domain;
- msm_mmu_init(&iommu->base, dev, &funcs);
msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); iommu_set_fault_handler(domain, msm_fault_handler, iommu);
ret = iommu_attach_device(iommu->domain, dev);
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 3a534ee59bf6..61ade89d9e48 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -17,18 +17,26 @@ struct msm_mmu_funcs { void (*destroy)(struct msm_mmu *mmu); };
+enum msm_mmu_type {
- MSM_MMU_GPUMMU,
- MSM_MMU_IOMMU,
- MSM_MMU_IOMMU_PAGETABLE,
+};
struct msm_mmu { const struct msm_mmu_funcs *funcs; struct device *dev; int (*handler)(void *arg, unsigned long iova, int flags); void *arg;
enum msm_mmu_type type; };
static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
const struct msm_mmu_funcs *funcs)
const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)
{ mmu->dev = dev; mmu->funcs = funcs;
mmu->type = type; }
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);
@@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg, mmu->handler = handler; }
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base, dma_addr_t *tran_error);
+int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
int *asid);
- #endif /* __MSM_MMU_H__ */
On Tue, Jul 7, 2020 at 4:36 AM Robin Murphy robin.murphy@arm.com wrote:
On 2020-06-26 21:04, Jordan Crouse wrote:
Add support to create a io-pgtable for use by targets that support per-instance pagetables. In order to support per-instance pagetables the GPU SMMU device needs to have the qcom,adreno-smmu compatible string and split pagetables and auxiliary domains need to be supported and enabled.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/msm_gpummu.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 180 ++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/msm_mmu.h | 16 ++- 3 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..aab121f4beb7 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu) }
gpummu->gpu = gpu;
msm_mmu_init(&gpummu->base, dev, &funcs);
msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU); return &gpummu->base;
}
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 1b6635504069..f455c597f76d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -4,15 +4,192 @@
- Author: Rob Clark robdclark@gmail.com
*/
+#include <linux/io-pgtable.h> #include "msm_drv.h" #include "msm_mmu.h"
struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain;
};struct iommu_domain *aux_domain;
- #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+struct msm_iommu_pagetable {
struct msm_mmu base;
struct msm_mmu *parent;
struct io_pgtable_ops *pgtbl_ops;
phys_addr_t ttbr;
u32 asid;
+};
+static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) +{
return container_of(mmu, struct msm_iommu_pagetable, base);
+}
+static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
size_t size)
+{
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
size_t unmapped = 0;
/* Unmap the block one page at a time */
while (size) {
unmapped += ops->unmap(ops, iova, 4096, NULL);
iova += 4096;
size -= 4096;
}
iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);
return (unmapped == size) ? 0 : -EINVAL;
+}
Remember in patch #1 when you said "Then 'domain' can be used like any other iommu domain to map and unmap iova addresses in the pagetable."?
This appears to be very much not that :/
I guess that comment is a bit stale.. the original plan was to create an iommu_domain per set of pgtables, but at some point we realized that by using the io-pgtable helpers directly, we would inflict a lot less GPU-crazy on the iommu drivers
BR, -R
Robin.
+static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
struct sg_table *sgt, size_t len, int prot)
+{
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
struct scatterlist *sg;
size_t mapped = 0;
u64 addr = iova;
unsigned int i;
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
size_t size = sg->length;
phys_addr_t phys = sg_phys(sg);
/* Map the block one page at a time */
while (size) {
if (ops->map(ops, addr, phys, 4096, prot)) {
msm_iommu_pagetable_unmap(mmu, iova, mapped);
return -EINVAL;
}
phys += 4096;
addr += 4096;
size -= 4096;
mapped += 4096;
}
}
return 0;
+}
+static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) +{
struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
free_io_pgtable_ops(pagetable->pgtbl_ops);
kfree(pagetable);
+}
+/*
- Given a parent device, create and return an aux domain. This will enable the
- TTBR0 region
- */
+static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent) +{
struct msm_iommu *iommu = to_msm_iommu(parent);
struct iommu_domain *domain;
int ret;
if (iommu->aux_domain)
return iommu->aux_domain;
if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))
return ERR_PTR(-ENODEV);
domain = iommu_domain_alloc(&platform_bus_type);
if (!domain)
return ERR_PTR(-ENODEV);
ret = iommu_aux_attach_device(domain, parent->dev);
if (ret) {
iommu_domain_free(domain);
return ERR_PTR(ret);
}
iommu->aux_domain = domain;
return domain;
+}
+int msm_iommu_pagetable_params(struct msm_mmu *mmu,
phys_addr_t *ttbr, int *asid)
+{
struct msm_iommu_pagetable *pagetable;
if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
return -EINVAL;
pagetable = to_pagetable(mmu);
if (ttbr)
*ttbr = pagetable->ttbr;
if (asid)
*asid = pagetable->asid;
return 0;
+}
+static const struct msm_mmu_funcs pagetable_funcs = {
.map = msm_iommu_pagetable_map,
.unmap = msm_iommu_pagetable_unmap,
.destroy = msm_iommu_pagetable_destroy,
+};
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) +{
static int next_asid = 16;
struct msm_iommu_pagetable *pagetable;
struct iommu_domain *aux_domain;
struct io_pgtable_cfg cfg;
int ret;
/* Make sure that the parent has a aux domain attached */
aux_domain = msm_iommu_get_aux_domain(parent);
if (IS_ERR(aux_domain))
return ERR_CAST(aux_domain);
/* Get the pagetable configuration from the aux domain */
ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);
if (ret)
return ERR_PTR(ret);
pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
if (!pagetable)
return ERR_PTR(-ENOMEM);
msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,
MSM_MMU_IOMMU_PAGETABLE);
cfg.tlb = NULL;
pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
&cfg, aux_domain);
if (!pagetable->pgtbl_ops) {
kfree(pagetable);
return ERR_PTR(-ENOMEM);
}
/* Needed later for TLB flush */
pagetable->parent = parent;
pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;
pagetable->asid = next_asid;
next_asid = (next_asid + 1) % 255;
if (next_asid < 16)
next_asid = 16;
return &pagetable->base;
+}
- static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags, void *arg) {
@@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, if (iova & BIT_ULL(48)) iova |= GENMASK_ULL(63, 49);
ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); WARN_ON(!ret);
@@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) return ERR_PTR(-ENOMEM);
iommu->domain = domain;
msm_mmu_init(&iommu->base, dev, &funcs);
msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); iommu_set_fault_handler(domain, msm_fault_handler, iommu); ret = iommu_attach_device(iommu->domain, dev);
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 3a534ee59bf6..61ade89d9e48 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -17,18 +17,26 @@ struct msm_mmu_funcs { void (*destroy)(struct msm_mmu *mmu); };
+enum msm_mmu_type {
MSM_MMU_GPUMMU,
MSM_MMU_IOMMU,
MSM_MMU_IOMMU_PAGETABLE,
+};
struct msm_mmu { const struct msm_mmu_funcs *funcs; struct device *dev; int (*handler)(void *arg, unsigned long iova, int flags); void *arg;
enum msm_mmu_type type;
};
static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
const struct msm_mmu_funcs *funcs)
const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)
{ mmu->dev = dev; mmu->funcs = funcs;
mmu->type = type;
}
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);
@@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg, mmu->handler = handler; }
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base, dma_addr_t *tran_error);
+int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
int *asid);
- #endif /* __MSM_MMU_H__ */
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
On Tue, Jul 07, 2020 at 12:36:42PM +0100, Robin Murphy wrote:
On 2020-06-26 21:04, Jordan Crouse wrote:
Add support to create a io-pgtable for use by targets that support per-instance pagetables. In order to support per-instance pagetables the GPU SMMU device needs to have the qcom,adreno-smmu compatible string and split pagetables and auxiliary domains need to be supported and enabled.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/msm_gpummu.c | 2 +- drivers/gpu/drm/msm/msm_iommu.c | 180 ++++++++++++++++++++++++++++++- drivers/gpu/drm/msm/msm_mmu.h | 16 ++- 3 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..aab121f4beb7 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -102,7 +102,7 @@ struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu) } gpummu->gpu = gpu;
- msm_mmu_init(&gpummu->base, dev, &funcs);
- msm_mmu_init(&gpummu->base, dev, &funcs, MSM_MMU_GPUMMU); return &gpummu->base;
} diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 1b6635504069..f455c597f76d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -4,15 +4,192 @@
- Author: Rob Clark robdclark@gmail.com
*/ +#include <linux/io-pgtable.h> #include "msm_drv.h" #include "msm_mmu.h" struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain;
- struct iommu_domain *aux_domain;
};
#define to_msm_iommu(x) container_of(x, struct msm_iommu, base) +struct msm_iommu_pagetable {
- struct msm_mmu base;
- struct msm_mmu *parent;
- struct io_pgtable_ops *pgtbl_ops;
- phys_addr_t ttbr;
- u32 asid;
+};
+static struct msm_iommu_pagetable *to_pagetable(struct msm_mmu *mmu) +{
- return container_of(mmu, struct msm_iommu_pagetable, base);
+}
+static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova,
size_t size)
+{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
- size_t unmapped = 0;
- /* Unmap the block one page at a time */
- while (size) {
unmapped += ops->unmap(ops, iova, 4096, NULL);
iova += 4096;
size -= 4096;
- }
- iommu_flush_tlb_all(to_msm_iommu(pagetable->parent)->domain);
- return (unmapped == size) ? 0 : -EINVAL;
+}
Remember in patch #1 when you said "Then 'domain' can be used like any other iommu domain to map and unmap iova addresses in the pagetable."?
This appears to be very much not that :/
The code changed but the commit log stayed the same. I'll reword.
Jordan
Robin.
+static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova,
struct sg_table *sgt, size_t len, int prot)
+{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- struct io_pgtable_ops *ops = pagetable->pgtbl_ops;
- struct scatterlist *sg;
- size_t mapped = 0;
- u64 addr = iova;
- unsigned int i;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
size_t size = sg->length;
phys_addr_t phys = sg_phys(sg);
/* Map the block one page at a time */
while (size) {
if (ops->map(ops, addr, phys, 4096, prot)) {
msm_iommu_pagetable_unmap(mmu, iova, mapped);
return -EINVAL;
}
phys += 4096;
addr += 4096;
size -= 4096;
mapped += 4096;
}
- }
- return 0;
+}
+static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) +{
- struct msm_iommu_pagetable *pagetable = to_pagetable(mmu);
- free_io_pgtable_ops(pagetable->pgtbl_ops);
- kfree(pagetable);
+}
+/*
- Given a parent device, create and return an aux domain. This will enable the
- TTBR0 region
- */
+static struct iommu_domain *msm_iommu_get_aux_domain(struct msm_mmu *parent) +{
- struct msm_iommu *iommu = to_msm_iommu(parent);
- struct iommu_domain *domain;
- int ret;
- if (iommu->aux_domain)
return iommu->aux_domain;
- if (!iommu_dev_has_feature(parent->dev, IOMMU_DEV_FEAT_AUX))
return ERR_PTR(-ENODEV);
- domain = iommu_domain_alloc(&platform_bus_type);
- if (!domain)
return ERR_PTR(-ENODEV);
- ret = iommu_aux_attach_device(domain, parent->dev);
- if (ret) {
iommu_domain_free(domain);
return ERR_PTR(ret);
- }
- iommu->aux_domain = domain;
- return domain;
+}
+int msm_iommu_pagetable_params(struct msm_mmu *mmu,
phys_addr_t *ttbr, int *asid)
+{
- struct msm_iommu_pagetable *pagetable;
- if (mmu->type != MSM_MMU_IOMMU_PAGETABLE)
return -EINVAL;
- pagetable = to_pagetable(mmu);
- if (ttbr)
*ttbr = pagetable->ttbr;
- if (asid)
*asid = pagetable->asid;
- return 0;
+}
+static const struct msm_mmu_funcs pagetable_funcs = {
.map = msm_iommu_pagetable_map,
.unmap = msm_iommu_pagetable_unmap,
.destroy = msm_iommu_pagetable_destroy,
+};
+struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) +{
- static int next_asid = 16;
- struct msm_iommu_pagetable *pagetable;
- struct iommu_domain *aux_domain;
- struct io_pgtable_cfg cfg;
- int ret;
- /* Make sure that the parent has a aux domain attached */
- aux_domain = msm_iommu_get_aux_domain(parent);
- if (IS_ERR(aux_domain))
return ERR_CAST(aux_domain);
- /* Get the pagetable configuration from the aux domain */
- ret = iommu_domain_get_attr(aux_domain, DOMAIN_ATTR_PGTABLE_CFG, &cfg);
- if (ret)
return ERR_PTR(ret);
- pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
- if (!pagetable)
return ERR_PTR(-ENOMEM);
- msm_mmu_init(&pagetable->base, parent->dev, &pagetable_funcs,
MSM_MMU_IOMMU_PAGETABLE);
- cfg.tlb = NULL;
- pagetable->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1,
&cfg, aux_domain);
- if (!pagetable->pgtbl_ops) {
kfree(pagetable);
return ERR_PTR(-ENOMEM);
- }
- /* Needed later for TLB flush */
- pagetable->parent = parent;
- pagetable->ttbr = cfg.arm_lpae_s1_cfg.ttbr;
- pagetable->asid = next_asid;
- next_asid = (next_asid + 1) % 255;
- if (next_asid < 16)
next_asid = 16;
- return &pagetable->base;
+}
static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, unsigned long iova, int flags, void *arg) { @@ -40,6 +217,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, if (iova & BIT_ULL(48)) iova |= GENMASK_ULL(63, 49);
- ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); WARN_ON(!ret);
@@ -85,7 +263,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) return ERR_PTR(-ENOMEM); iommu->domain = domain;
- msm_mmu_init(&iommu->base, dev, &funcs);
- msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); iommu_set_fault_handler(domain, msm_fault_handler, iommu); ret = iommu_attach_device(iommu->domain, dev);
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index 3a534ee59bf6..61ade89d9e48 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -17,18 +17,26 @@ struct msm_mmu_funcs { void (*destroy)(struct msm_mmu *mmu); }; +enum msm_mmu_type {
- MSM_MMU_GPUMMU,
- MSM_MMU_IOMMU,
- MSM_MMU_IOMMU_PAGETABLE,
+};
struct msm_mmu { const struct msm_mmu_funcs *funcs; struct device *dev; int (*handler)(void *arg, unsigned long iova, int flags); void *arg;
- enum msm_mmu_type type;
}; static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
const struct msm_mmu_funcs *funcs)
const struct msm_mmu_funcs *funcs, enum msm_mmu_type type)
{ mmu->dev = dev; mmu->funcs = funcs;
- mmu->type = type;
} struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain); @@ -41,7 +49,13 @@ static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg, mmu->handler = handler; } +struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent);
void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base, dma_addr_t *tran_error);
+int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
int *asid);
#endif /* __MSM_MMU_H__ */
Add support for allocating an address space instance. Targets that support per-instance pagetables should implement their own function to allocate a new instance. The default will return the existing generic address space.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/msm_drv.c | 15 +++++++++------ drivers/gpu/drm/msm/msm_drv.h | 4 ++++ drivers/gpu/drm/msm/msm_gem_vma.c | 9 +++++++++ drivers/gpu/drm/msm/msm_gpu.c | 17 +++++++++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 5 +++++ 5 files changed, 44 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 6c57cc72d627..092c49552ddd 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -588,7 +588,7 @@ static int context_init(struct drm_device *dev, struct drm_file *file)
msm_submitqueue_init(dev, ctx);
- ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL; + ctx->aspace = msm_gpu_address_space_instance(priv->gpu); file->driver_priv = ctx;
return 0; @@ -607,6 +607,8 @@ static int msm_open(struct drm_device *dev, struct drm_file *file) static void context_close(struct msm_file_private *ctx) { msm_submitqueue_close(ctx); + + msm_gem_address_space_put(ctx->aspace); kfree(ctx); }
@@ -771,18 +773,19 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data, }
static int msm_ioctl_gem_info_iova(struct drm_device *dev, - struct drm_gem_object *obj, uint64_t *iova) + struct drm_file *file, struct drm_gem_object *obj, + uint64_t *iova) { - struct msm_drm_private *priv = dev->dev_private; + struct msm_file_private *ctx = file->driver_priv;
- if (!priv->gpu) + if (!ctx->aspace) return -EINVAL;
/* * Don't pin the memory here - just get an address so that userspace can * be productive */ - return msm_gem_get_iova(obj, priv->gpu->aspace, iova); + return msm_gem_get_iova(obj, ctx->aspace, iova); }
static int msm_ioctl_gem_info(struct drm_device *dev, void *data, @@ -821,7 +824,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data, args->value = msm_gem_mmap_offset(obj); break; case MSM_INFO_GET_IOVA: - ret = msm_ioctl_gem_info_iova(dev, obj, &args->value); + ret = msm_ioctl_gem_info_iova(dev, file, obj, &args->value); break; case MSM_INFO_SET_NAME: /* length check should leave room for terminating null: */ diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index e2d6a6056418..983a8b7e5a74 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -249,6 +249,10 @@ int msm_gem_map_vma(struct msm_gem_address_space *aspace, void msm_gem_close_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma);
+ +struct msm_gem_address_space * +msm_gem_address_space_get(struct msm_gem_address_space *aspace); + void msm_gem_address_space_put(struct msm_gem_address_space *aspace);
struct msm_gem_address_space * diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 5f6a11211b64..29cc1305cf37 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -27,6 +27,15 @@ void msm_gem_address_space_put(struct msm_gem_address_space *aspace) kref_put(&aspace->kref, msm_gem_address_space_destroy); }
+struct msm_gem_address_space * +msm_gem_address_space_get(struct msm_gem_address_space *aspace) +{ + if (!IS_ERR_OR_NULL(aspace)) + kref_get(&aspace->kref); + + return aspace; +} + /* Actually unmap memory for the vma */ void msm_gem_purge_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 86a138641477..0fa614430799 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -821,6 +821,23 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) return 0; }
+/* Return a new address space instance */ +struct msm_gem_address_space * +msm_gpu_address_space_instance(struct msm_gpu *gpu) +{ + if (!gpu) + return NULL; + + /* + * If the GPU doesn't support instanced address spaces return the + * default address space + */ + if (!gpu->funcs->address_space_instance) + return msm_gem_address_space_get(gpu->aspace); + + return gpu->funcs->address_space_instance(gpu); +} + int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 429cb40f7931..f1762b77bea8 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -64,6 +64,8 @@ struct msm_gpu_funcs { void (*gpu_set_freq)(struct msm_gpu *gpu, unsigned long freq); struct msm_gem_address_space *(*create_address_space) (struct msm_gpu *gpu, struct platform_device *pdev); + struct msm_gem_address_space *(*address_space_instance) + (struct msm_gpu *gpu); };
struct msm_gpu { @@ -286,6 +288,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config);
+struct msm_gem_address_space * +msm_gpu_address_space_instance(struct msm_gpu *gpu); + void msm_gpu_cleanup(struct msm_gpu *gpu);
struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
Add support for using per-instance pagetables if all the dependencies are available.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org ---
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + 2 files changed, 44 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index aa53f47b7e8b..95ed2ceac121 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); }
+static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + struct msm_file_private *ctx) +{ + phys_addr_t ttbr; + u32 asid; + + if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) + return; + + /* Execute the table update */ + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr)); + /* CONTEXTIDR is currently unused */ + OUT_RING(ring, 0); + /* CONTEXTBANK is currently unused */ + OUT_RING(ring, 0); + + /* + * Write the new TTBR0 to the memstore. This is good for debugging. + */ + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0))); + OUT_RING(ring, lower_32_bits(ttbr)); + OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr)); +} + static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i;
+ a6xx_set_pagetable(gpu, ring, ctx); + get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start));
@@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu *gpu) +{ + struct msm_mmu *mmu; + + mmu = msm_iommu_pagetable_create(gpu->aspace->mmu); + if (IS_ERR(mmu)) + return msm_gem_address_space_get(gpu->aspace); + + return msm_gem_address_space_create(mmu, + "gpu", 0x100000000ULL, 0x1ffffffffULL); +} + static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space, + .address_space_instance = a6xx_address_space_instance, }, .get_timestamp = a6xx_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 7764373d0ed2..0987d6bf848c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -31,6 +31,7 @@ struct msm_rbmemptrs { volatile uint32_t fence;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT]; + volatile u64 ttbr0; };
struct msm_ringbuffer {
On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse jcrouse@codeaurora.org wrote:
Add support for using per-instance pagetables if all the dependencies are available.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + 2 files changed, 44 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index aa53f47b7e8b..95ed2ceac121 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); }
+static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
struct msm_file_private *ctx)
+{
phys_addr_t ttbr;
u32 asid;
if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
return;
/* Execute the table update */
OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
/* CONTEXTIDR is currently unused */
OUT_RING(ring, 0);
/* CONTEXTBANK is currently unused */
OUT_RING(ring, 0);
/*
* Write the new TTBR0 to the memstore. This is good for debugging.
*/
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
+}
static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i;
a6xx_set_pagetable(gpu, ring, ctx);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start));
@@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu *gpu) +{
struct msm_mmu *mmu;
mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
if (IS_ERR(mmu))
return msm_gem_address_space_get(gpu->aspace);
return msm_gem_address_space_create(mmu,
"gpu", 0x100000000ULL, 0x1ffffffffULL);
+}
static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space,
.address_space_instance = a6xx_address_space_instance,
Hmm, maybe instead of .address_space_instance, something like .create_context_address_space?
Since like .create_address_space, it is creating an address space.. the difference is that it is a per context/process aspace..
BR, -R
}, .get_timestamp = a6xx_get_timestamp,
}; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 7764373d0ed2..0987d6bf848c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -31,6 +31,7 @@ struct msm_rbmemptrs { volatile uint32_t fence;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
volatile u64 ttbr0;
};
struct msm_ringbuffer {
2.17.1
On Sat, Jun 27, 2020 at 12:56 PM Rob Clark robdclark@gmail.com wrote:
On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse jcrouse@codeaurora.org wrote:
Add support for using per-instance pagetables if all the dependencies are available.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + 2 files changed, 44 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index aa53f47b7e8b..95ed2ceac121 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); }
+static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
struct msm_file_private *ctx)
+{
phys_addr_t ttbr;
u32 asid;
if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
return;
/* Execute the table update */
OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
/* CONTEXTIDR is currently unused */
OUT_RING(ring, 0);
/* CONTEXTBANK is currently unused */
OUT_RING(ring, 0);
/*
* Write the new TTBR0 to the memstore. This is good for debugging.
*/
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
+}
static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i;
a6xx_set_pagetable(gpu, ring, ctx);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start));
@@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu *gpu) +{
struct msm_mmu *mmu;
mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
if (IS_ERR(mmu))
return msm_gem_address_space_get(gpu->aspace);
return msm_gem_address_space_create(mmu,
"gpu", 0x100000000ULL, 0x1ffffffffULL);
+}
static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space,
.address_space_instance = a6xx_address_space_instance,
Hmm, maybe instead of .address_space_instance, something like .create_context_address_space?
Since like .create_address_space, it is creating an address space.. the difference is that it is a per context/process aspace..
or maybe just .create_pgtable and return the 'struct msm_mmu' (which is itself starting to become less of a great name)..
The only other thing a6xx_address_space_instance() adds is knowing where the split is between the kernel and user pgtables, and I suppose that isn't a thing that would really be changing between gens?
BR, -R
BR, -R
}, .get_timestamp = a6xx_get_timestamp,
}; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 7764373d0ed2..0987d6bf848c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -31,6 +31,7 @@ struct msm_rbmemptrs { volatile uint32_t fence;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
volatile u64 ttbr0;
};
struct msm_ringbuffer {
2.17.1
On Sat, Jun 27, 2020 at 01:11:14PM -0700, Rob Clark wrote:
On Sat, Jun 27, 2020 at 12:56 PM Rob Clark robdclark@gmail.com wrote:
On Fri, Jun 26, 2020 at 1:04 PM Jordan Crouse jcrouse@codeaurora.org wrote:
Add support for using per-instance pagetables if all the dependencies are available.
Signed-off-by: Jordan Crouse jcrouse@codeaurora.org
drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 43 +++++++++++++++++++++++++++ drivers/gpu/drm/msm/msm_ringbuffer.h | 1 + 2 files changed, 44 insertions(+)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index aa53f47b7e8b..95ed2ceac121 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -79,6 +79,34 @@ static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter, OUT_RING(ring, upper_32_bits(iova)); }
+static void a6xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
struct msm_file_private *ctx)
+{
phys_addr_t ttbr;
u32 asid;
if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid))
return;
/* Execute the table update */
OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 4);
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
/* CONTEXTIDR is currently unused */
OUT_RING(ring, 0);
/* CONTEXTBANK is currently unused */
OUT_RING(ring, 0);
/*
* Write the new TTBR0 to the memstore. This is good for debugging.
*/
OUT_PKT7(ring, CP_MEM_WRITE, 4);
OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0)));
OUT_RING(ring, lower_32_bits(ttbr));
OUT_RING(ring, (((u64) asid) << 48) | upper_32_bits(ttbr));
+}
static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -89,6 +117,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i;
a6xx_set_pagetable(gpu, ring, ctx);
get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO, rbmemptr_stats(ring, index, cpcycles_start));
@@ -872,6 +902,18 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu) return (unsigned long)busy_time; }
+struct msm_gem_address_space *a6xx_address_space_instance(struct msm_gpu *gpu) +{
struct msm_mmu *mmu;
mmu = msm_iommu_pagetable_create(gpu->aspace->mmu);
if (IS_ERR(mmu))
return msm_gem_address_space_get(gpu->aspace);
return msm_gem_address_space_create(mmu,
"gpu", 0x100000000ULL, 0x1ffffffffULL);
+}
static const struct adreno_gpu_funcs funcs = { .base = { .get_param = adreno_get_param, @@ -895,6 +937,7 @@ static const struct adreno_gpu_funcs funcs = { .gpu_state_put = a6xx_gpu_state_put, #endif .create_address_space = adreno_iommu_create_address_space,
.address_space_instance = a6xx_address_space_instance,
Hmm, maybe instead of .address_space_instance, something like .create_context_address_space?
Since like .create_address_space, it is creating an address space.. the difference is that it is a per context/process aspace..
This is a good suggestion. I'm always open to changing function names.
or maybe just .create_pgtable and return the 'struct msm_mmu' (which is itself starting to become less of a great name)..
The only other thing a6xx_address_space_instance() adds is knowing where the split is between the kernel and user pgtables, and I suppose that isn't a thing that would really be changing between gens?
In theory the split is determined by the hardware but its been the same for all a5xx/a6xx targets.
Jordan
BR, -R
BR, -R
}, .get_timestamp = a6xx_get_timestamp,
}; diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 7764373d0ed2..0987d6bf848c 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -31,6 +31,7 @@ struct msm_rbmemptrs { volatile uint32_t fence;
volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
volatile u64 ttbr0;
};
struct msm_ringbuffer {
2.17.1
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
dri-devel@lists.freedesktop.org