At long last, I've got loops passing the piglit tests. (4 tests fail due to register allocation, and one fails for reasons I haven't debugged yet, but that's still a lot of new tests passing).
The userspace side of loops can be found at http://github.com/anholt/mesa/tree/vc4-loops and I'll try to get an identity getparam patch sent out tomorrow.
Eric Anholt (7): drm/vc4: Add a getparam ioctl for getting the V3D identity regs. drm/vc4: Move validation's current/max ip into the validation struct. drm/vc4: Add a bitmap of branch targets during shader validation. drm/vc4: Add support for branching in shader validation. drm/vc4: Add a getparam to signal support for branches. drm/vc4: Fix definition of QPU_R_MS_REV_FLAGS drm/vc4: Fix a "the the" typo in a comment.
drivers/gpu/drm/vc4/vc4_drv.c | 28 ++ drivers/gpu/drm/vc4/vc4_drv.h | 3 + drivers/gpu/drm/vc4/vc4_qpu_defines.h | 17 +- drivers/gpu/drm/vc4/vc4_validate.c | 13 +- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 449 ++++++++++++++++++++++++++--- include/uapi/drm/vc4_drm.h | 12 + 6 files changed, 478 insertions(+), 44 deletions(-)
As I extend the driver to support different V3D revisions, userspace needs to know what version it's targeting. This is most easily detected using the V3D identity registers.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.c | 25 +++++++++++++++++++++++++ include/uapi/drm/vc4_drm.h | 11 +++++++++++ 2 files changed, 36 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 3446ece21b4a..8a2f2b5a8f32 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -43,6 +43,30 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) return map; }
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_get_param *args = data; + + switch (args->param) { + case DRM_VC4_PARAM_V3D_IDENT0: + args->value = V3D_READ(V3D_IDENT0); + break; + case DRM_VC4_PARAM_V3D_IDENT1: + args->value = V3D_READ(V3D_IDENT1); + break; + case DRM_VC4_PARAM_V3D_IDENT2: + args->value = V3D_READ(V3D_IDENT2); + break; + default: + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; + } + + return 0; +} + static void vc4_lastclose(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -74,6 +98,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), };
static struct drm_driver vc4_drm_driver = { diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index af12e8a184c8..c4427d31b346 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -37,6 +37,7 @@ extern "C" { #define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_CREATE_SHADER_BO 0x05 #define DRM_VC4_GET_HANG_STATE 0x06 +#define DRM_VC4_GET_PARAM 0x07
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -45,6 +46,7 @@ extern "C" { #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) +#define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -280,6 +282,15 @@ struct drm_vc4_get_hang_state { __u32 pad[16]; };
+#define DRM_VC4_PARAM_V3D_IDENT0 0 +#define DRM_VC4_PARAM_V3D_IDENT1 1 +#define DRM_VC4_PARAM_V3D_IDENT2 2 + +struct drm_vc4_get_param { + __u32 param; + __u32 value; +}; + #if defined(__cplusplus) } #endif
As I extend the driver to support different V3D revisions, userspace needs to know what version it's targeting. This is most easily detected using the V3D identity registers.
v2: Make sure V3D is runtime PM on when reading the registers.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.c | 39 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/vc4_drm.h | 11 +++++++++++ 2 files changed, 50 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 2f30214ee810..56d43f5f38fb 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include "drm_fb_cma_helper.h"
#include "uapi/drm/vc4_drm.h" @@ -43,6 +44,43 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) return map; }
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_get_param *args = data; + int ret; + + switch (args->param) { + case DRM_VC4_PARAM_V3D_IDENT0: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT0); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT1: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT1); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT2: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT2); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + default: + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; + } + + return 0; +} + static void vc4_lastclose(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -74,6 +112,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), };
static struct drm_driver vc4_drm_driver = { diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index af12e8a184c8..c4427d31b346 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -37,6 +37,7 @@ extern "C" { #define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_CREATE_SHADER_BO 0x05 #define DRM_VC4_GET_HANG_STATE 0x06 +#define DRM_VC4_GET_PARAM 0x07
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -45,6 +46,7 @@ extern "C" { #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) +#define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -280,6 +282,15 @@ struct drm_vc4_get_hang_state { __u32 pad[16]; };
+#define DRM_VC4_PARAM_V3D_IDENT0 0 +#define DRM_VC4_PARAM_V3D_IDENT1 1 +#define DRM_VC4_PARAM_V3D_IDENT2 2 + +struct drm_vc4_get_param { + __u32 param; + __u32 value; +}; + #if defined(__cplusplus) } #endif
As I extend the driver to support different V3D revisions, userspace needs to know what version it's targeting. This is most easily detected using the V3D identity registers.
v2: Make sure V3D is runtime PM on when reading the registers. v3: Switch to a 64-bit param value (suggested by Rob Clark in review)
Signed-off-by: Eric Anholt eric@anholt.net Acked-by: Daniel Vetter daniel.vetter@ffwll.ch (v2) Reviewed-by: Rob Clark robdclark@gmail.com (v3, over irc) --- drivers/gpu/drm/vc4/vc4_drv.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/vc4_drm.h | 12 ++++++++++++ 2 files changed, 54 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 2f30214ee810..047d7a265ceb 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include "drm_fb_cma_helper.h"
#include "uapi/drm/vc4_drm.h" @@ -43,6 +44,46 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index) return map; }
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_vc4_get_param *args = data; + int ret; + + if (args->pad != 0) + return -EINVAL; + + switch (args->param) { + case DRM_VC4_PARAM_V3D_IDENT0: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT0); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT1: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT1); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + case DRM_VC4_PARAM_V3D_IDENT2: + ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); + if (ret) + return ret; + args->value = V3D_READ(V3D_IDENT2); + pm_runtime_put(&vc4->v3d->pdev->dev); + break; + default: + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; + } + + return 0; +} + static void vc4_lastclose(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -74,6 +115,7 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), };
static struct drm_driver vc4_drm_driver = { diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index af12e8a184c8..1143e954048d 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -37,6 +37,7 @@ extern "C" { #define DRM_VC4_MMAP_BO 0x04 #define DRM_VC4_CREATE_SHADER_BO 0x05 #define DRM_VC4_GET_HANG_STATE 0x06 +#define DRM_VC4_GET_PARAM 0x07
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) @@ -45,6 +46,7 @@ extern "C" { #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) +#define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
struct drm_vc4_submit_rcl_surface { __u32 hindex; /* Handle index, or ~0 if not present. */ @@ -280,6 +282,16 @@ struct drm_vc4_get_hang_state { __u32 pad[16]; };
+#define DRM_VC4_PARAM_V3D_IDENT0 0 +#define DRM_VC4_PARAM_V3D_IDENT1 1 +#define DRM_VC4_PARAM_V3D_IDENT2 2 + +struct drm_vc4_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + #if defined(__cplusplus) } #endif
Reduces the argument count for some of the functions, and will be used more with the upcoming looping support.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 54 +++++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index f67124b4c534..771d904653f2 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -40,6 +40,14 @@ #include "vc4_qpu_defines.h"
struct vc4_shader_validation_state { + /* Current IP being validated. */ + uint32_t ip; + + /* IP at the end of the BO, do not read shader[max_ip] */ + uint32_t max_ip; + + uint64_t *shader; + struct vc4_texture_sample_info tmu_setup[2]; int tmu_write_count[2];
@@ -129,11 +137,11 @@ record_texture_sample(struct vc4_validated_shader_info *validated_shader, }
static bool -check_tmu_write(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +check_tmu_write(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state, bool is_mul) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); @@ -228,11 +236,11 @@ check_tmu_write(uint64_t inst, }
static bool -check_reg_write(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +check_reg_write(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state, bool is_mul) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); @@ -261,7 +269,7 @@ check_reg_write(uint64_t inst, case QPU_W_TMU1_T: case QPU_W_TMU1_R: case QPU_W_TMU1_B: - return check_tmu_write(inst, validated_shader, validation_state, + return check_tmu_write(validated_shader, validation_state, is_mul);
case QPU_W_HOST_INT: @@ -294,10 +302,10 @@ check_reg_write(uint64_t inst, }
static void -track_live_clamps(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +track_live_clamps(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); @@ -369,10 +377,10 @@ track_live_clamps(uint64_t inst, }
static bool -check_instruction_writes(uint64_t inst, - struct vc4_validated_shader_info *validated_shader, +check_instruction_writes(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); bool ok; @@ -382,12 +390,10 @@ check_instruction_writes(uint64_t inst, return false; }
- ok = (check_reg_write(inst, validated_shader, validation_state, - false) && - check_reg_write(inst, validated_shader, validation_state, - true)); + ok = (check_reg_write(validated_shader, validation_state, false) && + check_reg_write(validated_shader, validation_state, true));
- track_live_clamps(inst, validated_shader, validation_state); + track_live_clamps(validated_shader, validation_state);
return ok; } @@ -417,30 +423,30 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { bool found_shader_end = false; int shader_end_ip = 0; - uint32_t ip, max_ip; - uint64_t *shader; + uint32_t ip; struct vc4_validated_shader_info *validated_shader; struct vc4_shader_validation_state validation_state; int i;
memset(&validation_state, 0, sizeof(validation_state)); + validation_state.shader = shader_obj->vaddr; + validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
for (i = 0; i < 8; i++) validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) validation_state.live_min_clamp_offsets[i] = ~0;
- shader = shader_obj->vaddr; - max_ip = shader_obj->base.size / sizeof(uint64_t); - validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); if (!validated_shader) return NULL;
- for (ip = 0; ip < max_ip; ip++) { - uint64_t inst = shader[ip]; + for (ip = 0; ip < validation_state.max_ip; ip++) { + uint64_t inst = validation_state.shader[ip]; uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+ validation_state.ip = ip; + switch (sig) { case QPU_SIG_NONE: case QPU_SIG_WAIT_FOR_SCOREBOARD: @@ -450,7 +456,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) case QPU_SIG_LOAD_TMU1: case QPU_SIG_PROG_END: case QPU_SIG_SMALL_IMM: - if (!check_instruction_writes(inst, validated_shader, + if (!check_instruction_writes(validated_shader, &validation_state)) { DRM_ERROR("Bad write at ip %d\n", ip); goto fail; @@ -467,7 +473,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) break;
case QPU_SIG_LOAD_IMM: - if (!check_instruction_writes(inst, validated_shader, + if (!check_instruction_writes(validated_shader, &validation_state)) { DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); goto fail; @@ -487,7 +493,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) break; }
- if (ip == max_ip) { + if (ip == validation_state.max_ip) { DRM_ERROR("shader failed to terminate before " "shader BO end at %zd\n", shader_obj->base.size);
This isn't used yet, it's just a first step toward loop validation. During the main parsing of instructions, we need to know when we hit a new basic block so that we can reset validated state.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_qpu_defines.h | 12 +++ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 114 ++++++++++++++++++++++++++++- 2 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h index d5c2f3c85ebb..82ef0e525d55 100644 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h @@ -230,6 +230,15 @@ enum qpu_unpack_r4 { #define QPU_COND_MUL_SHIFT 46 #define QPU_COND_MUL_MASK QPU_MASK(48, 46)
+#define QPU_BRANCH_COND_SHIFT 52 +#define QPU_BRANCH_COND_MASK QPU_MASK(55, 52) + +#define QPU_BRANCH_REL ((uint64_t)1 << 51) +#define QPU_BRANCH_REG ((uint64_t)1 << 50) + +#define QPU_BRANCH_RADDR_A_SHIFT 45 +#define QPU_BRANCH_RADDR_A_MASK QPU_MASK(49, 45) + #define QPU_SF ((uint64_t)1 << 45)
#define QPU_WADDR_ADD_SHIFT 38 @@ -261,4 +270,7 @@ enum qpu_unpack_r4 { #define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
+#define QPU_BRANCH_TARGET_SHIFT 0 +#define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0) + #endif /* VC4_QPU_DEFINES_H */ diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 771d904653f2..bcf3c31b2765 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -59,6 +59,13 @@ struct vc4_shader_validation_state { */ uint32_t live_min_clamp_offsets[32 + 32 + 4]; bool live_max_clamp_regs[32 + 32 + 4]; + + /* Bitfield of which IPs are used as branch targets. + * + * Used for validation that the uniform stream is updated at the right + * points and clearing the texturing/clamping state. + */ + unsigned long *branch_targets; };
static uint32_t @@ -418,13 +425,104 @@ check_instruction_reads(uint64_t inst, return true; }
+/* Make sure that all branches are absolute and point within the shader, and + * note their targets for later. + */ +static bool +vc4_validate_branches(struct vc4_shader_validation_state *validation_state) +{ + uint32_t max_branch_target = 0; + bool found_shader_end = false; + int ip; + int shader_end_ip = 0; + int last_branch = -2; + + for (ip = 0; ip < validation_state->max_ip; ip++) { + uint64_t inst = validation_state->shader[ip]; + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + uint32_t after_delay_ip = ip + 4; + uint32_t branch_target_ip; + + if (sig == QPU_SIG_PROG_END) { + shader_end_ip = ip; + found_shader_end = true; + continue; + } + + if (sig != QPU_SIG_BRANCH) + continue; + + if (ip - last_branch < 4) { + DRM_ERROR("Branch at %d during delay slots\n", ip); + return false; + } + last_branch = ip; + + if (inst & QPU_BRANCH_REG) { + DRM_ERROR("branching from register relative " + "not supported\n"); + return false; + } + + if (!(inst & QPU_BRANCH_REL)) { + DRM_ERROR("relative branching required\n"); + return false; + } + + /* The actual branch target is the instruction after the delay + * slots, plus whatever byte offset is in the low 32 bits of + * the instruction. Make sure we're not branching beyond the + * end of the shader object. + */ + if (branch_imm % sizeof(inst) != 0) { + DRM_ERROR("branch target not aligned\n"); + return false; + }; + + branch_target_ip = after_delay_ip + (branch_imm >> 3); + if (branch_target_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n", + ip, branch_target_ip, + validation_state->max_ip); + return false; + } + set_bit(branch_target_ip, validation_state->branch_targets); + + /* Make sure that the non-branching path is also not outside + * the shader. + */ + if (after_delay_ip >= validation_state->max_ip) { + DRM_ERROR("Branch at %d continues past shader end " + "(%d/%d)\n", + ip, after_delay_ip, validation_state->max_ip); + return false; + } + set_bit(after_delay_ip, validation_state->branch_targets); + max_branch_target = max(max_branch_target, after_delay_ip); + + /* There are two delay slots after program end is signaled + * that are still executed, then we're finished. + */ + if (found_shader_end && ip == shader_end_ip + 2) + break; + } + + if (max_branch_target > shader_end_ip) { + DRM_ERROR("Branch landed after QPU_SIG_PROG_END"); + return false; + } + + return true; +} + struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { bool found_shader_end = false; int shader_end_ip = 0; uint32_t ip; - struct vc4_validated_shader_info *validated_shader; + struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; int i;
@@ -437,9 +535,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) validation_state.live_min_clamp_offsets[i] = ~0;
+ validation_state.branch_targets = + kcalloc(BITS_TO_LONGS(validation_state.max_ip), + sizeof(unsigned long), GFP_KERNEL); + if (!validation_state.branch_targets) + goto fail; + validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); if (!validated_shader) - return NULL; + goto fail; + + if (!vc4_validate_branches(&validation_state)) + goto fail;
for (ip = 0; ip < validation_state.max_ip; ip++) { uint64_t inst = validation_state.shader[ip]; @@ -508,9 +615,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) (validated_shader->uniforms_size + 4 * validated_shader->num_texture_samples);
+ kfree(validation_state.branch_targets); + return validated_shader;
fail: + kfree(validation_state.branch_targets); if (validated_shader) { kfree(validated_shader->texture_samples); kfree(validated_shader);
Eric Anholt eric@anholt.net writes:
This isn't used yet, it's just a first step toward loop validation. During the main parsing of instructions, we need to know when we hit a new basic block so that we can reset validated state.
Signed-off-by: Eric Anholt eric@anholt.net
drivers/gpu/drm/vc4/vc4_qpu_defines.h | 12 +++ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 114 ++++++++++++++++++++++++++++- 2 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h index d5c2f3c85ebb..82ef0e525d55 100644 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
/* The actual branch target is the instruction after the delay
* slots, plus whatever byte offset is in the low 32 bits of
* the instruction. Make sure we're not branching beyond the
* end of the shader object.
*/
if (branch_imm % sizeof(inst) != 0) {
DRM_ERROR("branch target not aligned\n");
return false;
};
Last change before pull request: I dropped the stray ';' that kbuild test robot caught.
We're already checking that branch instructions are between the start of the shader and the proper PROG_END sequence. The other thing we need to make branching safe is to verify that the shader doesn't read past the end of the uniforms stream.
To do that, we require that at any basic block reading uniforms have the following instructions:
load_imm temp, <next offset within uniform stream> add unif_addr, temp, unif
The instructions are generated by userspace, and the kernel verifies that the load_imm is of the expected offset, and that the add adds it to a uniform. We track which uniform in the stream that is, and at draw call time fix up the uniform stream to have the address of the start of the shader's uniforms at that location.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.h | 3 + drivers/gpu/drm/vc4/vc4_qpu_defines.h | 3 + drivers/gpu/drm/vc4/vc4_validate.c | 13 +- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 281 +++++++++++++++++++++++++++-- 4 files changed, 283 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 37cac59401d7..efc61a09162e 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -355,6 +355,9 @@ struct vc4_validated_shader_info { uint32_t uniforms_src_size; uint32_t num_texture_samples; struct vc4_texture_sample_info *texture_samples; + + uint32_t num_uniform_addr_offsets; + uint32_t *uniform_addr_offsets; };
/** diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h index 82ef0e525d55..6d0745720a17 100644 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h @@ -270,6 +270,9 @@ enum qpu_unpack_r4 { #define QPU_OP_ADD_SHIFT 24 #define QPU_OP_ADD_MASK QPU_MASK(28, 24)
+#define QPU_LOAD_IMM_SHIFT 0 +#define QPU_LOAD_IMM_MASK QPU_MASK(31, 0) + #define QPU_BRANCH_TARGET_SHIFT 0 #define QPU_BRANCH_TARGET_MASK QPU_MASK(31, 0)
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 24c2c746e8f3..9ce1d0adf882 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev, uint32_t src_offset = *(uint32_t *)(pkt_u + o); uint32_t *texture_handles_u; void *uniform_data_u; - uint32_t tex; + uint32_t tex, uni;
*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
@@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev, } }
+ /* Fill in the uniform slots that need this shader's + * start-of-uniforms address (used for resetting the uniform + * stream in the presence of control flow). + */ + for (uni = 0; + uni < validated_shader->num_uniform_addr_offsets; + uni++) { + uint32_t o = validated_shader->uniform_addr_offsets[uni]; + ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p; + } + *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
exec->uniforms_u += validated_shader->uniforms_src_size; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index bcf3c31b2765..302ce68e24cd 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -39,6 +39,8 @@ #include "vc4_drv.h" #include "vc4_qpu_defines.h"
+#define LIVE_REG_COUNT (32 + 32 + 4) + struct vc4_shader_validation_state { /* Current IP being validated. */ uint32_t ip; @@ -57,8 +59,9 @@ struct vc4_shader_validation_state { * * This is used for the validation of direct address memory reads. */ - uint32_t live_min_clamp_offsets[32 + 32 + 4]; - bool live_max_clamp_regs[32 + 32 + 4]; + uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; + bool live_max_clamp_regs[LIVE_REG_COUNT]; + uint32_t live_immediates[LIVE_REG_COUNT];
/* Bitfield of which IPs are used as branch targets. * @@ -66,6 +69,20 @@ struct vc4_shader_validation_state { * points and clearing the texturing/clamping state. */ unsigned long *branch_targets; + + /* Set when entering a basic block, and cleared when the uniform + * address update is found. This is used to make sure that we don't + * read uniforms when the address is undefined. + */ + bool needs_uniform_address_update; + + /* Set when we find a backwards branch. If the branch is backwards, + * the taraget is probably doing an address reset to read uniforms, + * and so we need to be sure that a uniforms address is present in the + * stream, even if the shader didn't need to read uniforms in later + * basic blocks. + */ + bool needs_uniform_address_for_loop; };
static uint32_t @@ -227,8 +244,14 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader, /* Since direct uses a RADDR uniform reference, it will get counted in * check_instruction_reads() */ - if (!is_direct) + if (!is_direct) { + if (validation_state->needs_uniform_address_update) { + DRM_ERROR("Texturing with undefined uniform address\n"); + return false; + } + validated_shader->uniforms_size += 4; + }
if (submit) { if (!record_texture_sample(validated_shader, @@ -242,6 +265,98 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader, return true; }
+static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) +{ + uint32_t o = validated_shader->num_uniform_addr_offsets; + uint32_t num_uniforms = validated_shader->uniforms_size / 4; + + validated_shader->uniform_addr_offsets = + krealloc(validated_shader->uniform_addr_offsets, + (o + 1) * + sizeof(*validated_shader->uniform_addr_offsets), + GFP_KERNEL); + if (!validated_shader->uniform_addr_offsets) + return false; + + validated_shader->uniform_addr_offsets[o] = num_uniforms; + validated_shader->num_uniform_addr_offsets++; + + return true; +} + +static bool +validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + bool is_mul) +{ + uint64_t inst = validation_state->shader[validation_state->ip]; + u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); + u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); + u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); + u32 add_lri = raddr_add_a_to_live_reg_index(inst); + /* We want our reset to be pointing at whatever uniform follows the + * uniforms base address. + */ + u32 expected_offset = validated_shader->uniforms_size + 4; + + /* We only support absolute uniform address changes, and we + * require that they be in the current basic block before any + * of its uniform reads. + * + * One could potentially emit more efficient QPU code, by + * noticing that (say) an if statement does uniform control + * flow for all threads and that the if reads the same number + * of uniforms on each side. However, this scheme is easy to + * validate so it's all we allow for now. + */ + + if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { + DRM_ERROR("uniforms address change must be " + "normal math\n"); + return false; + } + + if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { + DRM_ERROR("Uniform address reset must be an ADD.\n"); + return false; + } + + if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { + DRM_ERROR("Uniform address reset must be unconditional.\n"); + return false; + } + + if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && + !(inst & QPU_PM)) { + DRM_ERROR("No packing allowed on uniforms reset\n"); + return false; + } + + if (add_lri == -1) { + DRM_ERROR("First argument of uniform address write must be " + "an immediate value.\n"); + return false; + } + + if (validation_state->live_immediates[add_lri] != expected_offset) { + DRM_ERROR("Resetting uniforms with offset %db instead of %db\n", + validation_state->live_immediates[add_lri], + expected_offset); + return false; + } + + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { + DRM_ERROR("Second argument of uniform address write must be " + "a uniform.\n"); + return false; + } + + validation_state->needs_uniform_address_update = false; + validation_state->needs_uniform_address_for_loop = false; + return require_uniform_address_uniform(validated_shader); +} + static bool check_reg_write(struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state, @@ -251,14 +366,37 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader, uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + bool ws = inst & QPU_WS; + bool is_b = is_mul ^ ws; + u32 lri = waddr_to_live_reg_index(waddr, is_b); + + if (lri != -1) { + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); + uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); + + if (sig == QPU_SIG_LOAD_IMM && + QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && + ((is_mul && cond_mul == QPU_COND_ALWAYS) || + (!is_mul && cond_add == QPU_COND_ALWAYS))) { + validation_state->live_immediates[lri] = + QPU_GET_FIELD(inst, QPU_LOAD_IMM); + } else { + validation_state->live_immediates[lri] = ~0; + } + }
switch (waddr) { case QPU_W_UNIFORMS_ADDRESS: - /* XXX: We'll probably need to support this for reladdr, but - * it's definitely a security-related one. - */ - DRM_ERROR("uniforms address load unsupported\n"); - return false; + if (is_b) { + DRM_ERROR("relative uniforms address change " + "unsupported\n"); + return false; + } + + return validate_uniform_address_write(validated_shader, + validation_state, + is_mul);
case QPU_W_TLB_COLOR_MS: case QPU_W_TLB_COLOR_ALL: @@ -406,9 +544,35 @@ check_instruction_writes(struct vc4_validated_shader_info *validated_shader, }
static bool -check_instruction_reads(uint64_t inst, - struct vc4_validated_shader_info *validated_shader) +check_branch(uint64_t inst, + struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state, + int ip) +{ + int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); + uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + + if ((int)branch_imm < 0) + validation_state->needs_uniform_address_for_loop = true; + + /* We don't want to have to worry about validation of this, and + * there's no need for it. + */ + if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { + DRM_ERROR("branch instruction at %d wrote a register.\n", + validation_state->ip); + return false; + } + + return true; +} + +static bool +check_instruction_reads(struct vc4_validated_shader_info *validated_shader, + struct vc4_shader_validation_state *validation_state) { + uint64_t inst = validation_state->shader[validation_state->ip]; uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); @@ -420,6 +584,12 @@ check_instruction_reads(uint64_t inst, * already be OOM. */ validated_shader->uniforms_size += 4; + + if (validation_state->needs_uniform_address_update) { + DRM_ERROR("Uniform read with undefined uniform " + "address\n"); + return false; + } }
return true; @@ -516,6 +686,65 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state) return true; }
+/* Resets any known state for the shader, used when we may be branched to from + * multiple locations in the program (or at shader start). + */ +static void +reset_validation_state(struct vc4_shader_validation_state *validation_state) +{ + int i; + + for (i = 0; i < 8; i++) + validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; + + for (i = 0; i < LIVE_REG_COUNT; i++) { + validation_state->live_min_clamp_offsets[i] = ~0; + validation_state->live_max_clamp_regs[i] = false; + validation_state->live_immediates[i] = ~0; + } +} + +static bool +texturing_in_progress(struct vc4_shader_validation_state *validation_state) +{ + return (validation_state->tmu_write_count[0] != 0 || + validation_state->tmu_write_count[1] != 0); +} + +static bool +vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) +{ + uint32_t ip = validation_state->ip; + + if (!test_bit(ip, validation_state->branch_targets)) + return true; + + if (texturing_in_progress(validation_state)) { + DRM_ERROR("Branch target landed during TMU setup\n"); + return false; + } + + /* Reset our live values tracking, since this instruction may have + * multiple predecessors. + * + * One could potentially do analysis to determine that, for + * example, all predecessors have a live max clamp in the same + * register, but we don't bother with that. + */ + reset_validation_state(validation_state); + + /* Since we've entered a basic block from potentially multiple + * predecessors, we need the uniforms address to be updated before any + * unforms are read. We require that after any branch point, the next + * uniform to be loaded is a uniform address offset. That uniform's + * offset will be marked by the uniform address register write + * validation, or a one-off the end-of-program check. + */ + validation_state->needs_uniform_address_update = true; + + return true; +} + struct vc4_validated_shader_info * vc4_validate_shader(struct drm_gem_cma_object *shader_obj) { @@ -524,16 +753,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) uint32_t ip; struct vc4_validated_shader_info *validated_shader = NULL; struct vc4_shader_validation_state validation_state; - int i;
memset(&validation_state, 0, sizeof(validation_state)); validation_state.shader = shader_obj->vaddr; validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
- for (i = 0; i < 8; i++) - validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; - for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) - validation_state.live_min_clamp_offsets[i] = ~0; + reset_validation_state(&validation_state);
validation_state.branch_targets = kcalloc(BITS_TO_LONGS(validation_state.max_ip), @@ -554,6 +779,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
validation_state.ip = ip;
+ if (!vc4_handle_branch_target(&validation_state)) + goto fail; + switch (sig) { case QPU_SIG_NONE: case QPU_SIG_WAIT_FOR_SCOREBOARD: @@ -569,7 +797,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) goto fail; }
- if (!check_instruction_reads(inst, validated_shader)) + if (!check_instruction_reads(validated_shader, + &validation_state)) goto fail;
if (sig == QPU_SIG_PROG_END) { @@ -587,6 +816,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) } break;
+ case QPU_SIG_BRANCH: + if (!check_branch(inst, validated_shader, + &validation_state, ip)) + goto fail; + break; default: DRM_ERROR("Unsupported QPU signal %d at " "instruction %d\n", sig, ip); @@ -607,6 +841,21 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) goto fail; }
+ /* If we did a backwards branch and we haven't emitted a uniforms + * reset since then, we still need the uniforms stream to have the + * uniforms address available so that the backwards branch can do its + * uniforms reset. + * + * We could potentially prove that the backwards branch doesn't + * contain any uses of uniforms until program exit, but that doesn't + * seem to be worth the trouble. + */ + if (validation_state.needs_uniform_address_for_loop) { + if (!require_uniform_address_uniform(validated_shader)) + goto fail; + validated_shader->uniforms_size += 4; + } + /* Again, no chance of integer overflow here because the worst case * scenario is 8 bytes of uniforms plus handles per 8-byte * instruction.
Userspace needs to know if it can create shaders that do branching. Otherwise, for backwards compatibility with old kernels it needs to lower if statements to conditional assignments.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_drv.c | 3 +++ include/uapi/drm/vc4_drm.h | 1 + 2 files changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 8a2f2b5a8f32..fd6e34600d50 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -59,6 +59,9 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, case DRM_VC4_PARAM_V3D_IDENT2: args->value = V3D_READ(V3D_IDENT2); break; + case DRM_VC4_PARAM_SUPPORTS_BRANCHES: + args->value = true; + break; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index c4427d31b346..86cc0cd0d125 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -285,6 +285,7 @@ struct drm_vc4_get_hang_state { #define DRM_VC4_PARAM_V3D_IDENT0 0 #define DRM_VC4_PARAM_V3D_IDENT1 1 #define DRM_VC4_PARAM_V3D_IDENT2 2 +#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
struct drm_vc4_get_param { __u32 param;
We don't use it in shader validation currently, so it had no effect, but best to fix it anyway in case we do some day.
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_qpu_defines.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h index 6d0745720a17..f4e795a0d3f6 100644 --- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h +++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h @@ -70,7 +70,7 @@ enum qpu_raddr { QPU_R_ELEM_QPU = 38, QPU_R_NOP, QPU_R_XY_PIXEL_COORD = 41, - QPU_R_MS_REV_FLAGS = 41, + QPU_R_MS_REV_FLAGS = 42, QPU_R_VPM = 48, QPU_R_VPM_LD_BUSY, QPU_R_VPM_LD_WAIT,
Signed-off-by: Eric Anholt eric@anholt.net --- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 302ce68e24cd..d126145c60ca 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -194,7 +194,7 @@ check_tmu_write(struct vc4_validated_shader_info *validated_shader, return false; }
- /* We assert that the the clamped address is the first + /* We assert that the clamped address is the first * argument, and the UBO base address is the second argument. * This is arbitrary, but simpler than supporting flipping the * two either way.
dri-devel@lists.freedesktop.org