Re: [PATCH v6 2/2] drm/lima: driver for ARM Mali4xx GPUs

6 Mar 2019

On Wed, Mar 6, 2019 at 4:16 AM Eric Anholt eric@anholt.net wrote:
...
Qiang Yu yuq825@gmail.com writes:
...

Mali 4xx GPUs have two kinds of processors GP and PP. GP is for
OpenGL vertex shader processing and PP is for fragment shader
processing. Each processor has its own MMU so prcessors work in
virtual address space.
There's only one GP but multiple PP (max 4 for mali 400 and 8
for mali 450) in the same mali 4xx GPU. All PPs are grouped
togather to handle a single fragment shader task divided by
FB output tiled pixels. Mali 400 user space driver is
responsible for assign target tiled pixels to each PP, but mali
450 has a HW module called DLBU to dynamically balance each
PP's load.
User space driver allocate buffer object and map into GPU
virtual address space, upload command stream and draw data with
CPU mmap of the buffer object, then submit task to GP/PP with
a register frame indicating where is the command stream and misc
settings.
There's no command stream validation/relocation due to each user
process has its own GPU virtual address space. GP/PP's MMU switch
virtual address space before running two tasks from different
user process. Error or evil user space code just get MMU fault
or GP/PP error IRQ, then the HW/SW will be recovered.
Use GEM+shmem for MM. Currently just alloc and pin memory when
gem object creation. GPU vm map of the buffer is also done in
the alloc stage in kernel space. We may delay the memory
allocation and real GPU vm map to command submission stage in the
furture as improvement.
Use drm_sched for GPU task schedule. Each OpenGL context should
have a lima context object in the kernel to distinguish tasks
from different user. drm_sched gets task from each lima context
in a fair way.

Given the requirement for open source userspace for new DRM kernel
drivers, it would be nice to see the link to your open source userspace
with the submission so we can see how the interfaces get used.  (I know
I found it once before, but I don't remember now)
The link is in the cover letter of this patch serial, I'll add it too next time:
https://gitlab.freedesktop.org/lima/mesa
...
However, other than a concern about the _pad fields in the UABI, I'm
ready to add a reviewed-b.
...
--- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/lima/Kconfig | 10 + drivers/gpu/drm/lima/Makefile | 21
++ drivers/gpu/drm/lima/lima_bcast.c | 47 +++
drivers/gpu/drm/lima/lima_bcast.h | 14 +
drivers/gpu/drm/lima/lima_ctx.c | 97 ++++++
drivers/gpu/drm/lima/lima_ctx.h | 30 ++
drivers/gpu/drm/lima/lima_device.c | 385 +++++++++++++++++++++++
drivers/gpu/drm/lima/lima_device.h | 131 ++++++++
drivers/gpu/drm/lima/lima_dlbu.c | 58 ++++
drivers/gpu/drm/lima/lima_dlbu.h | 18 ++
drivers/gpu/drm/lima/lima_drv.c | 366 ++++++++++++++++++++++
drivers/gpu/drm/lima/lima_drv.h | 45 +++
drivers/gpu/drm/lima/lima_gem.c | 381 +++++++++++++++++++++++
drivers/gpu/drm/lima/lima_gem.h | 25 ++
drivers/gpu/drm/lima/lima_gem_prime.c | 47 +++
drivers/gpu/drm/lima/lima_gem_prime.h | 13 +
drivers/gpu/drm/lima/lima_gp.c | 283 +++++++++++++++++
drivers/gpu/drm/lima/lima_gp.h | 16 +
drivers/gpu/drm/lima/lima_l2_cache.c | 80 +++++
drivers/gpu/drm/lima/lima_l2_cache.h | 14 +
drivers/gpu/drm/lima/lima_mmu.c | 142 +++++++++
drivers/gpu/drm/lima/lima_mmu.h | 16 +
drivers/gpu/drm/lima/lima_object.c | 122 ++++++++
drivers/gpu/drm/lima/lima_object.h | 36 +++
drivers/gpu/drm/lima/lima_pmu.c | 60 ++++
drivers/gpu/drm/lima/lima_pmu.h | 12 + drivers/gpu/drm/lima/lima_pp.c
| 424 ++++++++++++++++++++++++++ drivers/gpu/drm/lima/lima_pp.h | 19
++ drivers/gpu/drm/lima/lima_regs.h | 298 ++++++++++++++++++
drivers/gpu/drm/lima/lima_sched.c | 404 ++++++++++++++++++++++++
drivers/gpu/drm/lima/lima_sched.h | 104 +++++++
drivers/gpu/drm/lima/lima_vm.c | 282 +++++++++++++++++
drivers/gpu/drm/lima/lima_vm.h | 62 ++++ include/uapi/drm/lima_drm.h |
139 +++++++++ 36 files changed, 4204 insertions(+) create mode 100644
drivers/gpu/drm/lima/Kconfig create mode 100644
drivers/gpu/drm/lima/Makefile create mode 100644
drivers/gpu/drm/lima/lima_bcast.c create mode 100644
drivers/gpu/drm/lima/lima_bcast.h create mode 100644
drivers/gpu/drm/lima/lima_ctx.c create mode 100644
drivers/gpu/drm/lima/lima_ctx.h create mode 100644
drivers/gpu/drm/lima/lima_device.c create mode 100644
drivers/gpu/drm/lima/lima_device.h create mode 100644
drivers/gpu/drm/lima/lima_dlbu.c create mode 100644
drivers/gpu/drm/lima/lima_dlbu.h create mode 100644
drivers/gpu/drm/lima/lima_drv.c create mode 100644
drivers/gpu/drm/lima/lima_drv.h create mode 100644
drivers/gpu/drm/lima/lima_gem.c create mode 100644
drivers/gpu/drm/lima/lima_gem.h create mode 100644
drivers/gpu/drm/lima/lima_gem_prime.c create mode 100644
drivers/gpu/drm/lima/lima_gem_prime.h create mode 100644
drivers/gpu/drm/lima/lima_gp.c create mode 100644
drivers/gpu/drm/lima/lima_gp.h create mode 100644
drivers/gpu/drm/lima/lima_l2_cache.c create mode 100644
drivers/gpu/drm/lima/lima_l2_cache.h create mode 100644
drivers/gpu/drm/lima/lima_mmu.c create mode 100644
drivers/gpu/drm/lima/lima_mmu.h create mode 100644
drivers/gpu/drm/lima/lima_object.c create mode 100644
drivers/gpu/drm/lima/lima_object.h create mode 100644
drivers/gpu/drm/lima/lima_pmu.c create mode 100644
drivers/gpu/drm/lima/lima_pmu.h create mode 100644
drivers/gpu/drm/lima/lima_pp.c create mode 100644
drivers/gpu/drm/lima/lima_pp.h create mode 100644
drivers/gpu/drm/lima/lima_regs.h create mode 100644
drivers/gpu/drm/lima/lima_sched.c create mode 100644
drivers/gpu/drm/lima/lima_sched.h create mode 100644
drivers/gpu/drm/lima/lima_vm.c create mode 100644
drivers/gpu/drm/lima/lima_vm.h create mode 100644
include/uapi/drm/lima_drm.h
...

diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
new file mode 100644
index 000000000000..606e8aad2a82
--- /dev/null
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/* Copyright 2017-2019 Qiang Yu yuq825@gmail.com */



+#include <linux/kthread.h>
+#include <linux/slab.h>



+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+#include "lima_object.h"



+struct lima_fence {

struct dma_fence base;


struct lima_sched_pipe *pipe;



+};



+static struct kmem_cache *lima_fence_slab;



+int lima_sched_slab_init(void)
+{

lima_fence_slab = kmem_cache_create(


        "lima_fence", sizeof(struct lima_fence), 0,


        SLAB_HWCACHE_ALIGN, NULL);


if (!lima_fence_slab)


        return -ENOMEM;



return 0;



+}



+void lima_sched_slab_fini(void)
+{

kmem_cache_destroy(lima_fence_slab);



+}



+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{

return container_of(fence, struct lima_fence, base);



+}



+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{

return "lima";



+}



+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{

struct lima_fence *f = to_lima_fence(fence);



return f->pipe->base.name;



+}



+static bool lima_fence_enable_signaling(struct dma_fence *fence)
+{

return true;



+}



+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{

struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);


struct lima_fence *fence = to_lima_fence(f);



kmem_cache_free(lima_fence_slab, fence);



+}



+static void lima_fence_release(struct dma_fence *fence)
+{

struct lima_fence *f = to_lima_fence(fence);



call_rcu(&f->base.rcu, lima_fence_release_rcu);



+}



+static const struct dma_fence_ops lima_fence_ops = {

.get_driver_name = lima_fence_get_driver_name,


.get_timeline_name = lima_fence_get_timeline_name,


.enable_signaling = lima_fence_enable_signaling,


.wait = dma_fence_default_wait,


.release = lima_fence_release,



+};
You can delete .enable_signaling and .wait now (See 418cc6ca0607
("dma-fence: Make ->wait callback optional"))
...
diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h
new file mode 100644
index 000000000000..705723a64b5f
--- /dev/null
+++ b/include/uapi/drm/lima_drm.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/* Copyright 2017-2018 Qiang Yu yuq825@gmail.com */



+#ifndef __LIMA_DRM_H__
+#define __LIMA_DRM_H__



+#include "drm.h"



+#if defined(__cplusplus)
+extern "C" {
+#endif



+enum drm_lima_param_gpu_id {

DRM_LIMA_PARAM_GPU_ID_UNKNOWN,


DRM_LIMA_PARAM_GPU_ID_MALI400,


DRM_LIMA_PARAM_GPU_ID_MALI450,



+};



+enum drm_lima_param {

DRM_LIMA_PARAM_GPU_ID,


DRM_LIMA_PARAM_NUM_PP,


DRM_LIMA_PARAM_GP_VERSION,


DRM_LIMA_PARAM_PP_VERSION,



+};



+struct drm_lima_get_param {

__u32 param; /* in */


__u32 pad;


__u64 value; /* out */



+};



+struct drm_lima_gem_create {

__u32 size;    /* in */


__u32 flags;   /* in */


__u32 handle;  /* out */


__u32 pad;



+};
Might be nice to pass the offset back out from create like I did in v3d.
It's convenient to not need an immediate GEM_INFO.  Totally optional
suggestion, though.
I thought this way, but gem_info can't be removed even embedded
into gem_create due to dmabuf import case, and better to keep info
in a single place so didn't do this. But indeed immediate gem_info
is a wast of syscall.
...
...



+struct drm_lima_gem_info {

__u32 handle;  /* in */


__u32 va;      /* out */


__u64 offset;  /* out */



+};



+#define LIMA_SUBMIT_BO_READ   0x01
+#define LIMA_SUBMIT_BO_WRITE  0x02



+struct drm_lima_gem_submit_bo {

__u32 handle;  /* in */


__u32 flags;   /* in */



+};



+#define LIMA_GP_FRAME_REG_NUM 6



+struct drm_lima_gp_frame {

__u32 frame[LIMA_GP_FRAME_REG_NUM];



+};



+#define LIMA_PP_FRAME_REG_NUM 23
+#define LIMA_PP_WB_REG_NUM 12



+struct drm_lima_m400_pp_frame {

__u32 frame[LIMA_PP_FRAME_REG_NUM];


__u32 num_pp;


__u32 wb[3 * LIMA_PP_WB_REG_NUM];


__u32 plbu_array_address[4];


__u32 fragment_stack_address[4];



+};



+struct drm_lima_m450_pp_frame {

__u32 frame[LIMA_PP_FRAME_REG_NUM];


__u32 num_pp;


__u32 wb[3 * LIMA_PP_WB_REG_NUM];


__u32 use_dlbu;


__u32 _pad;


union {


        __u32 plbu_array_address[8];


        __u32 dlbu_regs[4];


};


__u32 fragment_stack_address[8];



+};



+#define LIMA_PIPE_GP  0x00
+#define LIMA_PIPE_PP  0x01



+#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)



+struct drm_lima_gem_submit {

__u32 ctx;         /* in */


__u32 pipe;        /* in */


__u32 nr_bos;      /* in */


__u32 frame_size;  /* in */


__u64 bos;         /* in */


__u64 frame;       /* in */


__u32 flags;       /* in */


__u32 out_sync;    /* in */


__u32 in_sync[2];  /* in */



+};



+#define LIMA_GEM_WAIT_READ   0x01
+#define LIMA_GEM_WAIT_WRITE  0x02



+struct drm_lima_gem_wait {

__u32 handle;      /* in */


__u32 op;          /* in */


__s64 timeout_ns;  /* in */



Add a comment that it's an absolute ns?
...
+};



+struct drm_lima_ctx_create {

__u32 id;          /* out */


__u32 _pad;



+};



+struct drm_lima_ctx_free {

__u32 id;          /* in */


__u32 _pad;



+};
I don't think you need the _pad fields here, and they're actually a bad
idea because the lack of checking in your ioctls means you can't trust
that userspace has initialized them to 0 when you want to redefine them
as a flags field later.
Could I drop the _pad? I thought there is a rule that all drm ioctl arg size
should be kept 64bit, is there?
Regards,
Qiang

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

Re: [PATCH v6 2/2] drm/lima: driver for ARM Mali4xx GPUs