On Wed, Mar 6, 2019 at 4:16 AM Eric Anholt eric@anholt.net wrote:
Qiang Yu yuq825@gmail.com writes:
- Mali 4xx GPUs have two kinds of processors GP and PP. GP is for OpenGL vertex shader processing and PP is for fragment shader processing. Each processor has its own MMU so prcessors work in virtual address space.
- There's only one GP but multiple PP (max 4 for mali 400 and 8 for mali 450) in the same mali 4xx GPU. All PPs are grouped togather to handle a single fragment shader task divided by FB output tiled pixels. Mali 400 user space driver is responsible for assign target tiled pixels to each PP, but mali 450 has a HW module called DLBU to dynamically balance each PP's load.
- User space driver allocate buffer object and map into GPU virtual address space, upload command stream and draw data with CPU mmap of the buffer object, then submit task to GP/PP with a register frame indicating where is the command stream and misc settings.
- There's no command stream validation/relocation due to each user process has its own GPU virtual address space. GP/PP's MMU switch virtual address space before running two tasks from different user process. Error or evil user space code just get MMU fault or GP/PP error IRQ, then the HW/SW will be recovered.
- Use GEM+shmem for MM. Currently just alloc and pin memory when gem object creation. GPU vm map of the buffer is also done in the alloc stage in kernel space. We may delay the memory allocation and real GPU vm map to command submission stage in the furture as improvement.
- Use drm_sched for GPU task schedule. Each OpenGL context should have a lima context object in the kernel to distinguish tasks from different user. drm_sched gets task from each lima context in a fair way.
Given the requirement for open source userspace for new DRM kernel drivers, it would be nice to see the link to your open source userspace with the submission so we can see how the interfaces get used. (I know I found it once before, but I don't remember now)
The link is in the cover letter of this patch serial, I'll add it too next time: https://gitlab.freedesktop.org/lima/mesa
However, other than a concern about the _pad fields in the UABI, I'm ready to add a reviewed-b.
--- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/lima/Kconfig | 10 + drivers/gpu/drm/lima/Makefile | 21 ++ drivers/gpu/drm/lima/lima_bcast.c | 47 +++ drivers/gpu/drm/lima/lima_bcast.h | 14 + drivers/gpu/drm/lima/lima_ctx.c | 97 ++++++ drivers/gpu/drm/lima/lima_ctx.h | 30 ++ drivers/gpu/drm/lima/lima_device.c | 385 +++++++++++++++++++++++ drivers/gpu/drm/lima/lima_device.h | 131 ++++++++ drivers/gpu/drm/lima/lima_dlbu.c | 58 ++++ drivers/gpu/drm/lima/lima_dlbu.h | 18 ++ drivers/gpu/drm/lima/lima_drv.c | 366 ++++++++++++++++++++++ drivers/gpu/drm/lima/lima_drv.h | 45 +++ drivers/gpu/drm/lima/lima_gem.c | 381 +++++++++++++++++++++++ drivers/gpu/drm/lima/lima_gem.h | 25 ++ drivers/gpu/drm/lima/lima_gem_prime.c | 47 +++ drivers/gpu/drm/lima/lima_gem_prime.h | 13 + drivers/gpu/drm/lima/lima_gp.c | 283 +++++++++++++++++ drivers/gpu/drm/lima/lima_gp.h | 16 + drivers/gpu/drm/lima/lima_l2_cache.c | 80 +++++ drivers/gpu/drm/lima/lima_l2_cache.h | 14 + drivers/gpu/drm/lima/lima_mmu.c | 142 +++++++++ drivers/gpu/drm/lima/lima_mmu.h | 16 + drivers/gpu/drm/lima/lima_object.c | 122 ++++++++ drivers/gpu/drm/lima/lima_object.h | 36 +++ drivers/gpu/drm/lima/lima_pmu.c | 60 ++++ drivers/gpu/drm/lima/lima_pmu.h | 12 + drivers/gpu/drm/lima/lima_pp.c | 424 ++++++++++++++++++++++++++ drivers/gpu/drm/lima/lima_pp.h | 19 ++ drivers/gpu/drm/lima/lima_regs.h | 298 ++++++++++++++++++ drivers/gpu/drm/lima/lima_sched.c | 404 ++++++++++++++++++++++++ drivers/gpu/drm/lima/lima_sched.h | 104 +++++++ drivers/gpu/drm/lima/lima_vm.c | 282 +++++++++++++++++ drivers/gpu/drm/lima/lima_vm.h | 62 ++++ include/uapi/drm/lima_drm.h | 139 +++++++++ 36 files changed, 4204 insertions(+) create mode 100644 drivers/gpu/drm/lima/Kconfig create mode 100644 drivers/gpu/drm/lima/Makefile create mode 100644 drivers/gpu/drm/lima/lima_bcast.c create mode 100644 drivers/gpu/drm/lima/lima_bcast.h create mode 100644 drivers/gpu/drm/lima/lima_ctx.c create mode 100644 drivers/gpu/drm/lima/lima_ctx.h create mode 100644 drivers/gpu/drm/lima/lima_device.c create mode 100644 drivers/gpu/drm/lima/lima_device.h create mode 100644 drivers/gpu/drm/lima/lima_dlbu.c create mode 100644 drivers/gpu/drm/lima/lima_dlbu.h create mode 100644 drivers/gpu/drm/lima/lima_drv.c create mode 100644 drivers/gpu/drm/lima/lima_drv.h create mode 100644 drivers/gpu/drm/lima/lima_gem.c create mode 100644 drivers/gpu/drm/lima/lima_gem.h create mode 100644 drivers/gpu/drm/lima/lima_gem_prime.c create mode 100644 drivers/gpu/drm/lima/lima_gem_prime.h create mode 100644 drivers/gpu/drm/lima/lima_gp.c create mode 100644 drivers/gpu/drm/lima/lima_gp.h create mode 100644 drivers/gpu/drm/lima/lima_l2_cache.c create mode 100644 drivers/gpu/drm/lima/lima_l2_cache.h create mode 100644 drivers/gpu/drm/lima/lima_mmu.c create mode 100644 drivers/gpu/drm/lima/lima_mmu.h create mode 100644 drivers/gpu/drm/lima/lima_object.c create mode 100644 drivers/gpu/drm/lima/lima_object.h create mode 100644 drivers/gpu/drm/lima/lima_pmu.c create mode 100644 drivers/gpu/drm/lima/lima_pmu.h create mode 100644 drivers/gpu/drm/lima/lima_pp.c create mode 100644 drivers/gpu/drm/lima/lima_pp.h create mode 100644 drivers/gpu/drm/lima/lima_regs.h create mode 100644 drivers/gpu/drm/lima/lima_sched.c create mode 100644 drivers/gpu/drm/lima/lima_sched.h create mode 100644 drivers/gpu/drm/lima/lima_vm.c create mode 100644 drivers/gpu/drm/lima/lima_vm.h create mode 100644 include/uapi/drm/lima_drm.h
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c new file mode 100644 index 000000000000..606e8aad2a82 --- /dev/null +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -0,0 +1,404 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright 2017-2019 Qiang Yu yuq825@gmail.com */
+#include <linux/kthread.h> +#include <linux/slab.h>
+#include "lima_drv.h" +#include "lima_sched.h" +#include "lima_vm.h" +#include "lima_mmu.h" +#include "lima_l2_cache.h" +#include "lima_object.h"
+struct lima_fence {
struct dma_fence base;
struct lima_sched_pipe *pipe;
+};
+static struct kmem_cache *lima_fence_slab;
+int lima_sched_slab_init(void) +{
lima_fence_slab = kmem_cache_create(
"lima_fence", sizeof(struct lima_fence), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!lima_fence_slab)
return -ENOMEM;
return 0;
+}
+void lima_sched_slab_fini(void) +{
kmem_cache_destroy(lima_fence_slab);
+}
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence) +{
return container_of(fence, struct lima_fence, base);
+}
+static const char *lima_fence_get_driver_name(struct dma_fence *fence) +{
return "lima";
+}
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence) +{
struct lima_fence *f = to_lima_fence(fence);
return f->pipe->base.name;
+}
+static bool lima_fence_enable_signaling(struct dma_fence *fence) +{
return true;
+}
+static void lima_fence_release_rcu(struct rcu_head *rcu) +{
struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
struct lima_fence *fence = to_lima_fence(f);
kmem_cache_free(lima_fence_slab, fence);
+}
+static void lima_fence_release(struct dma_fence *fence) +{
struct lima_fence *f = to_lima_fence(fence);
call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+static const struct dma_fence_ops lima_fence_ops = {
.get_driver_name = lima_fence_get_driver_name,
.get_timeline_name = lima_fence_get_timeline_name,
.enable_signaling = lima_fence_enable_signaling,
.wait = dma_fence_default_wait,
.release = lima_fence_release,
+};
You can delete .enable_signaling and .wait now (See 418cc6ca0607 ("dma-fence: Make ->wait callback optional"))
diff --git a/include/uapi/drm/lima_drm.h b/include/uapi/drm/lima_drm.h new file mode 100644 index 000000000000..705723a64b5f --- /dev/null +++ b/include/uapi/drm/lima_drm.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* Copyright 2017-2018 Qiang Yu yuq825@gmail.com */
+#ifndef __LIMA_DRM_H__ +#define __LIMA_DRM_H__
+#include "drm.h"
+#if defined(__cplusplus) +extern "C" { +#endif
+enum drm_lima_param_gpu_id {
DRM_LIMA_PARAM_GPU_ID_UNKNOWN,
DRM_LIMA_PARAM_GPU_ID_MALI400,
DRM_LIMA_PARAM_GPU_ID_MALI450,
+};
+enum drm_lima_param {
DRM_LIMA_PARAM_GPU_ID,
DRM_LIMA_PARAM_NUM_PP,
DRM_LIMA_PARAM_GP_VERSION,
DRM_LIMA_PARAM_PP_VERSION,
+};
+struct drm_lima_get_param {
__u32 param; /* in */
__u32 pad;
__u64 value; /* out */
+};
+struct drm_lima_gem_create {
__u32 size; /* in */
__u32 flags; /* in */
__u32 handle; /* out */
__u32 pad;
+};
Might be nice to pass the offset back out from create like I did in v3d. It's convenient to not need an immediate GEM_INFO. Totally optional suggestion, though.
I thought this way, but gem_info can't be removed even embedded into gem_create due to dmabuf import case, and better to keep info in a single place so didn't do this. But indeed immediate gem_info is a wast of syscall.
+struct drm_lima_gem_info {
__u32 handle; /* in */
__u32 va; /* out */
__u64 offset; /* out */
+};
+#define LIMA_SUBMIT_BO_READ 0x01 +#define LIMA_SUBMIT_BO_WRITE 0x02
+struct drm_lima_gem_submit_bo {
__u32 handle; /* in */
__u32 flags; /* in */
+};
+#define LIMA_GP_FRAME_REG_NUM 6
+struct drm_lima_gp_frame {
__u32 frame[LIMA_GP_FRAME_REG_NUM];
+};
+#define LIMA_PP_FRAME_REG_NUM 23 +#define LIMA_PP_WB_REG_NUM 12
+struct drm_lima_m400_pp_frame {
__u32 frame[LIMA_PP_FRAME_REG_NUM];
__u32 num_pp;
__u32 wb[3 * LIMA_PP_WB_REG_NUM];
__u32 plbu_array_address[4];
__u32 fragment_stack_address[4];
+};
+struct drm_lima_m450_pp_frame {
__u32 frame[LIMA_PP_FRAME_REG_NUM];
__u32 num_pp;
__u32 wb[3 * LIMA_PP_WB_REG_NUM];
__u32 use_dlbu;
__u32 _pad;
union {
__u32 plbu_array_address[8];
__u32 dlbu_regs[4];
};
__u32 fragment_stack_address[8];
+};
+#define LIMA_PIPE_GP 0x00 +#define LIMA_PIPE_PP 0x01
+#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0)
+struct drm_lima_gem_submit {
__u32 ctx; /* in */
__u32 pipe; /* in */
__u32 nr_bos; /* in */
__u32 frame_size; /* in */
__u64 bos; /* in */
__u64 frame; /* in */
__u32 flags; /* in */
__u32 out_sync; /* in */
__u32 in_sync[2]; /* in */
+};
+#define LIMA_GEM_WAIT_READ 0x01 +#define LIMA_GEM_WAIT_WRITE 0x02
+struct drm_lima_gem_wait {
__u32 handle; /* in */
__u32 op; /* in */
__s64 timeout_ns; /* in */
Add a comment that it's an absolute ns?
+};
+struct drm_lima_ctx_create {
__u32 id; /* out */
__u32 _pad;
+};
+struct drm_lima_ctx_free {
__u32 id; /* in */
__u32 _pad;
+};
I don't think you need the _pad fields here, and they're actually a bad idea because the lack of checking in your ioctls means you can't trust that userspace has initialized them to 0 when you want to redefine them as a flags field later.
Could I drop the _pad? I thought there is a rule that all drm ioctl arg size should be kept 64bit, is there?
Regards, Qiang