This patch series fixes two issues in the host1x driver: First, the command buffer validation routine had vulnerabilities that were not detected in earlier testing. Second, the return codes of some functions were misleading or completely missing. This caused the driver to give wrong return codes also to the userspace.
The series is based on top of 3.10rc1. I have tested the patch series on cardhu by running host1x and gr2d test cases (available at [0]). I would appreciate any help in testing/reviewing these patches.
[0] https://gitorious.org/linux-host1x/libdrm-host1x
Arto Merilainen (5): gpu: host1x: Fix syncpoint wait return value gpu: host1x: Fix memory access in syncpt request gpu: host1x: Fix client_managed type gpu: host1x: Rework CPU syncpoint increment drm/tegra: Fix syncpoint increment return code
Terje Bergstrom (1): gpu: host1x: Fixes to host1x firewall
drivers/gpu/host1x/dev.h | 8 +-- drivers/gpu/host1x/drm/drm.c | 3 +- drivers/gpu/host1x/drm/gr2d.c | 2 +- drivers/gpu/host1x/hw/cdma_hw.c | 2 +- drivers/gpu/host1x/hw/syncpt_hw.c | 12 ++-- drivers/gpu/host1x/job.c | 120 ++++++++++++++++--------------------- drivers/gpu/host1x/syncpt.c | 29 +++------ drivers/gpu/host1x/syncpt.h | 13 ++-- 8 files changed, 79 insertions(+), 110 deletions(-)
From: Terje Bergstrom tbergstrom@nvidia.com
This patch adds several fixes to host1x firewall: - Host1x firewall does not survive if it expects a reloc, but user space didn't pass any relocs. Also it reset the reloc table for each gather, whereas they should be reset only per submit. Also class does not need to be reset for each class - once per submit is enough. - For INCR opcode the check was not working properly at all. - The firewall verified gather buffers before copying them. This allowed a malicious application to rewrite the buffer content by timing the rewrite carefully. This patch makes the buffer validation occur after copying the buffers.
Signed-off-by: Terje Bergstrom tbergstrom@nvidia.com Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/job.c | 120 ++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index f665d67..4f3c004 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -228,17 +228,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) void *cmdbuf_page_addr = NULL;
/* pin & patch the relocs for one gather */ - while (i < job->num_relocs) { + for (i = 0; i < job->num_relocs; ++i) { struct host1x_reloc *reloc = &job->relocarray[i]; u32 reloc_addr = (job->reloc_addr_phys[i] + reloc->target_offset) >> reloc->shift; u32 *target;
/* skip all other gathers */ - if (!(reloc->cmdbuf && cmdbuf == reloc->cmdbuf)) { - i++; + if (cmdbuf != reloc->cmdbuf) continue; - }
if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) { if (cmdbuf_page_addr) @@ -257,9 +255,6 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK); *target = reloc_addr; - - /* mark this gather as handled */ - reloc->cmdbuf = 0; }
if (cmdbuf_page_addr) @@ -268,15 +263,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) return 0; }
-static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, - unsigned int offset) +static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, + unsigned int offset) { offset *= sizeof(u32);
- if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset) - return -EINVAL; + if (!reloc || reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset) + return true;
- return 0; + return false; }
struct host1x_firewall { @@ -330,7 +325,7 @@ static int check_incr(struct host1x_firewall *fw) u32 count = fw->count; u32 reg = fw->reg;
- while (fw) { + while (count) { if (fw->words == 0) return -EINVAL;
@@ -376,69 +371,58 @@ static int check_nonincr(struct host1x_firewall *fw) return 0; }
-static int validate(struct host1x_job *job, struct device *dev, - struct host1x_job_gather *g) +static int validate_gather(struct host1x_firewall *fw, + struct host1x_job_gather *g) { - u32 *cmdbuf_base; + u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + (g->offset / 4); int err = 0; - struct host1x_firewall fw;
- fw.job = job; - fw.dev = dev; - fw.reloc = job->relocarray; - fw.num_relocs = job->num_relocs; - fw.cmdbuf_id = g->bo; - - fw.offset = 0; - fw.class = 0; - - if (!job->is_addr_reg) + if (!fw->job->is_addr_reg) return 0;
- cmdbuf_base = host1x_bo_mmap(g->bo); - if (!cmdbuf_base) - return -ENOMEM; + fw->words = g->words; + fw->cmdbuf_id = g->bo; + fw->offset = 0;
- fw.words = g->words; - while (fw.words && !err) { - u32 word = cmdbuf_base[fw.offset]; + while (fw->words && !err) { + u32 word = cmdbuf_base[fw->offset]; u32 opcode = (word & 0xf0000000) >> 28;
- fw.mask = 0; - fw.reg = 0; - fw.count = 0; - fw.words--; - fw.offset++; + fw->mask = 0; + fw->reg = 0; + fw->count = 0; + fw->words--; + fw->offset++;
switch (opcode) { case 0: - fw.class = word >> 6 & 0x3ff; - fw.mask = word & 0x3f; - fw.reg = word >> 16 & 0xfff; - err = check_mask(&fw); + fw->class = word >> 6 & 0x3ff; + fw->mask = word & 0x3f; + fw->reg = word >> 16 & 0xfff; + err = check_mask(fw); if (err) goto out; break; case 1: - fw.reg = word >> 16 & 0xfff; - fw.count = word & 0xffff; - err = check_incr(&fw); + fw->reg = word >> 16 & 0xfff; + fw->count = word & 0xffff; + err = check_incr(fw); if (err) goto out; break;
case 2: - fw.reg = word >> 16 & 0xfff; - fw.count = word & 0xffff; - err = check_nonincr(&fw); + fw->reg = word >> 16 & 0xfff; + fw->count = word & 0xffff; + err = check_nonincr(fw); if (err) goto out; break;
case 3: - fw.mask = word & 0xffff; - fw.reg = word >> 16 & 0xfff; - err = check_mask(&fw); + fw->mask = word & 0xffff; + fw->reg = word >> 16 & 0xfff; + err = check_mask(fw); if (err) goto out; break; @@ -453,21 +437,26 @@ static int validate(struct host1x_job *job, struct device *dev, }
/* No relocs should remain at this point */ - if (fw.num_relocs) + if (fw->num_relocs) err = -EINVAL;
out: - host1x_bo_munmap(g->bo, cmdbuf_base); - return err; }
static inline int copy_gathers(struct host1x_job *job, struct device *dev) { + struct host1x_firewall fw; size_t size = 0; size_t offset = 0; int i;
+ fw.job = job; + fw.dev = dev; + fw.reloc = job->relocarray; + fw.num_relocs = job->num_relocs; + fw.class = 0; + for (i = 0; i < job->num_gathers; i++) { struct host1x_job_gather *g = &job->gathers[i]; size += g->words * sizeof(u32); @@ -488,14 +477,19 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) struct host1x_job_gather *g = &job->gathers[i]; void *gather;
+ /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, g->words * sizeof(u32)); host1x_bo_munmap(g->bo, gather);
+ /* Store the location in the buffer */ g->base = job->gather_copy; g->offset = offset; - g->bo = NULL; + + /* Validate the job */ + if (validate_gather(&fw, g)) + return -EINVAL;
offset += g->words * sizeof(u32); } @@ -508,6 +502,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) int err; unsigned int i, j; struct host1x *host = dev_get_drvdata(dev->parent); + DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host)); @@ -540,20 +535,11 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (job->gathers[j].bo == g->bo) job->gathers[j].handled = true;
- err = 0; - - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) - err = validate(job, dev, g); - + err = do_relocs(job, g->bo); if (err) - dev_err(dev, "Job invalid (err=%d)\n", err); - - if (!err) - err = do_relocs(job, g->bo); - - if (!err) - err = do_waitchks(job, host, g->bo); + break;
+ err = do_waitchks(job, host, g->bo); if (err) break; }
On Fri, May 17, 2013 at 02:49:43PM +0300, Arto Merilainen wrote:
From: Terje Bergstrom tbergstrom@nvidia.com
This patch adds several fixes to host1x firewall:
- Host1x firewall does not survive if it expects a reloc, but user space didn't pass any relocs. Also it reset the reloc table for each gather, whereas they should be reset only per submit. Also class does not need to be reset for each class - once per submit is enough.
- For INCR opcode the check was not working properly at all.
- The firewall verified gather buffers before copying them. This allowed a malicious application to rewrite the buffer content by timing the rewrite carefully. This patch makes the buffer validation occur after copying the buffers.
Can these be split into separate patches, please? It's not only always good to split logical changes into separate patches but it also makes reviewing a lot more pleasant. It's hard to tell from this combined patch which changes belong together.
I have a few additional comments inline.
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index f665d67..4f3c004 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -228,17 +228,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) void *cmdbuf_page_addr = NULL;
/* pin & patch the relocs for one gather */
- while (i < job->num_relocs) {
- for (i = 0; i < job->num_relocs; ++i) {
Nit: I prefer post-increment where possible. For consistency.
@@ -268,15 +263,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) return 0; }
-static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
unsigned int offset)
+static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
unsigned int offset)
{ offset *= sizeof(u32);
- if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
return -EINVAL;
- if (!reloc || reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
Is the additional !reloc check really necessary? Looking at the callers, they always pass in fw->relocarray, which in turn is only NULL if no buffers are to be relocated.
return true;
- return 0;
- return false;
}
I wonder whether we should be changing the logic here and have the check_reloc() function return true if the relocation is good. I find that to be more intuitive.
@@ -376,69 +371,58 @@ static int check_nonincr(struct host1x_firewall *fw) return 0; }
-static int validate(struct host1x_job *job, struct device *dev,
struct host1x_job_gather *g)
+static int validate_gather(struct host1x_firewall *fw,
struct host1x_job_gather *g)
I don't think we necessarily need to rename the function. However since you modify each line that the rename touches anyway it's okay.
@@ -508,6 +502,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) int err; unsigned int i, j; struct host1x *host = dev_get_drvdata(dev->parent);
- DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
This is an unnecessary whitespace change.
Thierry
On 05/26/2013 01:02 PM, Thierry Reding wrote:
- PGP Signed by an unknown key
On Fri, May 17, 2013 at 02:49:43PM +0300, Arto Merilainen wrote:
From: Terje Bergstrom tbergstrom@nvidia.com
This patch adds several fixes to host1x firewall:
- Host1x firewall does not survive if it expects a reloc, but user space didn't pass any relocs. Also it reset the reloc table for each gather, whereas they should be reset only per submit. Also class does not need to be reset for each class - once per submit is enough.
- For INCR opcode the check was not working properly at all.
- The firewall verified gather buffers before copying them. This allowed a malicious application to rewrite the buffer content by timing the rewrite carefully. This patch makes the buffer validation occur after copying the buffers.
Can these be split into separate patches, please? It's not only always good to split logical changes into separate patches but it also makes reviewing a lot more pleasant. It's hard to tell from this combined patch which changes belong together.
Sure.
I have a few additional comments inline.
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index f665d67..4f3c004 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -228,17 +228,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) void *cmdbuf_page_addr = NULL;
/* pin & patch the relocs for one gather */
- while (i < job->num_relocs) {
- for (i = 0; i < job->num_relocs; ++i) {
Nit: I prefer post-increment where possible. For consistency.
Will do.
@@ -268,15 +263,15 @@ static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) return 0; }
-static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
unsigned int offset)
+static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
{ offset *= sizeof(u32);unsigned int offset)
- if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
return -EINVAL;
- if (!reloc || reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
Is the additional !reloc check really necessary? Looking at the callers, they always pass in fw->relocarray, which in turn is only NULL if no buffers are to be relocated.
Yes, the additional check is necessary exactly for that reason. The code will fail if the userspace does not deliver a relocation array and still pushes data to an address register.
However, the code *should* check that there are relocations left before even coming here so I probably just fix this differently.
return true;
- return 0;
- return false; }
I wonder whether we should be changing the logic here and have the check_reloc() function return true if the relocation is good. I find that to be more intuitive.
I was also thinking that earlier. Will do.
@@ -508,6 +502,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) int err; unsigned int i, j; struct host1x *host = dev_get_drvdata(dev->parent);
- DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
This is an unnecessary whitespace change.
Ops. Will fix.
- Arto
Syncpoint wait returned EAGAIN if it was called with zero timeout. This patch modifies the function to return ETIMEDOUT.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/syncpt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 4b49345..5bf5366 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -187,7 +187,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, }
if (!timeout) { - err = -EAGAIN; + err = -ETIMEDOUT; goto done; }
@@ -205,7 +205,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, if (err) goto done;
- err = -EAGAIN; + err = -ETIMEDOUT; /* Caller-specified timeout may be impractically low */ if (timeout < 0) timeout = LONG_MAX;
On Fri, May 17, 2013 at 02:49:44PM +0300, Arto Merilainen wrote:
Syncpoint wait returned EAGAIN if it was called with zero timeout. This patch modifies the function to return ETIMEDOUT.
This description is a bit redundant, because it repeats in prose what the code does. I'd rather see a description of why the change is necessary.
Thinking about it, maybe it would be good to have two separate error codes. Keeping -EAGAIN for the case where a zero timeout was passed doesn't sound too bad to differentiate it from the case where a non- zero timeout was passed and it actually timed out. What do you think?
Thierry
On 05/26/2013 01:12 PM, Thierry Reding wrote:
- PGP Signed by an unknown key
On Fri, May 17, 2013 at 02:49:44PM +0300, Arto Merilainen wrote:
Syncpoint wait returned EAGAIN if it was called with zero timeout. This patch modifies the function to return ETIMEDOUT.
This description is a bit redundant, because it repeats in prose what the code does. I'd rather see a description of why the change is necessary.
True. Will fix.
Thinking about it, maybe it would be good to have two separate error codes. Keeping -EAGAIN for the case where a zero timeout was passed doesn't sound too bad to differentiate it from the case where a non- zero timeout was passed and it actually timed out. What do you think?
I agree, in this case it would not look bad at all. However, user space libraries may loop until the ioctl return code is something else than -EAGAIN or -EINTR. Especially function drmIoctl() in libdrm does this which is why I noted this isssue in the first place.
If user space uses zero timeout to just check if a syncpoint value has already passed the library continues looping until the syncpoint value actually passes. Of course, we could just modify the ioctl interface to "cast" this return code to something else but that does not seem correct.
- Arto
On Mon, May 27, 2013 at 09:55:46AM +0300, Arto Merilainen wrote:
On 05/26/2013 01:12 PM, Thierry Reding wrote:
- PGP Signed by an unknown key
On Fri, May 17, 2013 at 02:49:44PM +0300, Arto Merilainen wrote:
[...]
Thinking about it, maybe it would be good to have two separate error codes. Keeping -EAGAIN for the case where a zero timeout was passed doesn't sound too bad to differentiate it from the case where a non- zero timeout was passed and it actually timed out. What do you think?
I agree, in this case it would not look bad at all. However, user space libraries may loop until the ioctl return code is something else than -EAGAIN or -EINTR. Especially function drmIoctl() in libdrm does this which is why I noted this isssue in the first place.
If user space uses zero timeout to just check if a syncpoint value has already passed the library continues looping until the syncpoint value actually passes. Of course, we could just modify the ioctl interface to "cast" this return code to something else but that does not seem correct.
That doesn't sound right. Maybe drmIoctl() needs fixing instead. Looking at the history, drmIoctl() was introduced to automatically loop if a signal was received (commit 8b9ab108ec1f2ba2b503f713769c4946849b3cb2). However the ioctl(3p) manpage doesn't mention that ioctl() returns EAGAIN in case it is interrupted by a signal.
I'm adding Keith as author of that commit and the xorg-devel mailing list on Cc to get some more eyes on this.
Thierry
Thierry Reding thierry.reding@gmail.com writes:
That doesn't sound right. Maybe drmIoctl() needs fixing instead. Looking at the history, drmIoctl() was introduced to automatically loop if a signal was received (commit 8b9ab108ec1f2ba2b503f713769c4946849b3cb2). However the ioctl(3p) manpage doesn't mention that ioctl() returns EAGAIN in case it is interrupted by a signal.
EAGAIN is being returned when the GPU is wedged to ask the application to re-submit the request, which will presumably be held until the GPU is un-wedged.
On Tue, May 28, 2013 at 01:12:12PM -0600, Keith Packard wrote:
Thierry Reding thierry.reding@gmail.com writes:
That doesn't sound right. Maybe drmIoctl() needs fixing instead. Looking at the history, drmIoctl() was introduced to automatically loop if a signal was received (commit 8b9ab108ec1f2ba2b503f713769c4946849b3cb2). However the ioctl(3p) manpage doesn't mention that ioctl() returns EAGAIN in case it is interrupted by a signal.
EAGAIN is being returned when the GPU is wedged to ask the application to re-submit the request, which will presumably be held until the GPU is un-wedged.
Isn't that a bit risky? What if something special needs to be done to unwedge the GPU other than re-submit the request, or if it just can't be reasonably unwedged. In that case drmIoctl() will keep looping indefinitely.
If the above is indeed the expected behaviour for drivers, then we need a different error code for the SYNCPT_WAIT ioctl. EAGAIN is the best fit and anything else doesn't quite match the use-case. A different option might be not to use drmIoctl() on Tegra.
Thierry
On Tue, Jun 11, 2013 at 12:48 PM, Thierry Reding thierry.reding@gmail.com wrote:
On Tue, May 28, 2013 at 01:12:12PM -0600, Keith Packard wrote:
Thierry Reding thierry.reding@gmail.com writes:
That doesn't sound right. Maybe drmIoctl() needs fixing instead. Looking at the history, drmIoctl() was introduced to automatically loop if a signal was received (commit 8b9ab108ec1f2ba2b503f713769c4946849b3cb2). However the ioctl(3p) manpage doesn't mention that ioctl() returns EAGAIN in case it is interrupted by a signal.
EAGAIN is being returned when the GPU is wedged to ask the application to re-submit the request, which will presumably be held until the GPU is un-wedged.
Isn't that a bit risky? What if something special needs to be done to unwedge the GPU other than re-submit the request, or if it just can't be reasonably unwedged. In that case drmIoctl() will keep looping indefinitely.
If the above is indeed the expected behaviour for drivers, then we need a different error code for the SYNCPT_WAIT ioctl. EAGAIN is the best fit and anything else doesn't quite match the use-case. A different option might be not to use drmIoctl() on Tegra.
We don't use the EAGAIN ioctl restarting to resubmit the batchbuffer which blew up the gpu (that one has been submitted already in a different ioctl call), but to be able to restart the ioctl after the reset has completed: We need to kick every thread which is potentially holding GEM locks and make sure that we block them (at a point where they don't hold any locks) until the reset handler completed. To avoid a validation nightmare we use the same codepaths as we use for signal interrupts, so ioctl restarting is a very natural fit for this.
Resubmitting victim workloads when a gpu crash happened is something the reset handler would do (kernel work item currently), not any userspace process doing an ioctl. But atm we don't resubmit victimized workloads. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On 11.06.2013 14:00, Daniel Vetter wrote:
We don't use the EAGAIN ioctl restarting to resubmit the batchbuffer which blew up the gpu (that one has been submitted already in a different ioctl call), but to be able to restart the ioctl after the reset has completed: We need to kick every thread which is potentially holding GEM locks and make sure that we block them (at a point where they don't hold any locks) until the reset handler completed. To avoid a validation nightmare we use the same codepaths as we use for signal interrupts, so ioctl restarting is a very natural fit for this.
Resubmitting victim workloads when a gpu crash happened is something the reset handler would do (kernel work item currently), not any userspace process doing an ioctl. But atm we don't resubmit victimized workloads.
I don't understand the end-to-end of how resubmit is supposed to work. User space is not supposed to resubmit, but still EAGAIN is returned to user space, and drmIoctl() in user space just calls the came ioctl again. Sounds like drmIoctl() is completely wrong.
In Tegra, when a job blows up, we reset the involved units, and set the pushbuffer pointer of host1x to point to the next job, and re-enable units. There's no need for anybody to resubmit anything, as kernel already has them.
Terje
On Tue, Jun 11, 2013 at 1:43 PM, Terje Bergström tbergstrom@nvidia.com wrote:
On 11.06.2013 14:00, Daniel Vetter wrote:
We don't use the EAGAIN ioctl restarting to resubmit the batchbuffer which blew up the gpu (that one has been submitted already in a different ioctl call), but to be able to restart the ioctl after the reset has completed: We need to kick every thread which is potentially holding GEM locks and make sure that we block them (at a point where they don't hold any locks) until the reset handler completed. To avoid a validation nightmare we use the same codepaths as we use for signal interrupts, so ioctl restarting is a very natural fit for this.
Resubmitting victim workloads when a gpu crash happened is something the reset handler would do (kernel work item currently), not any userspace process doing an ioctl. But atm we don't resubmit victimized workloads.
I don't understand the end-to-end of how resubmit is supposed to work. User space is not supposed to resubmit, but still EAGAIN is returned to user space, and drmIoctl() in user space just calls the came ioctl again. Sounds like drmIoctl() is completely wrong.
Maybe it wasn't clear, but -EAGAIN does _not_ resubmit work. -EAGAIN is used to restart the ioctl if we had to kick a thread (to make sure it doesn't hold any locks), e.g. for a blocking wait on oustanding rendering. The codepaths taken work exactly as if the thread is interrupt with a signal.
In Tegra, when a job blows up, we reset the involved units, and set the pushbuffer pointer of host1x to point to the next job, and re-enable units. There's no need for anybody to resubmit anything, as kernel already has them.
Yeah, that's how it works in i915.ko, too. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch
On Tue, Jun 11, 2013 at 02:09:31PM +0200, Daniel Vetter wrote:
On Tue, Jun 11, 2013 at 1:43 PM, Terje Bergström tbergstrom@nvidia.com wrote:
On 11.06.2013 14:00, Daniel Vetter wrote:
We don't use the EAGAIN ioctl restarting to resubmit the batchbuffer which blew up the gpu (that one has been submitted already in a different ioctl call), but to be able to restart the ioctl after the reset has completed: We need to kick every thread which is potentially holding GEM locks and make sure that we block them (at a point where they don't hold any locks) until the reset handler completed. To avoid a validation nightmare we use the same codepaths as we use for signal interrupts, so ioctl restarting is a very natural fit for this.
Resubmitting victim workloads when a gpu crash happened is something the reset handler would do (kernel work item currently), not any userspace process doing an ioctl. But atm we don't resubmit victimized workloads.
I don't understand the end-to-end of how resubmit is supposed to work. User space is not supposed to resubmit, but still EAGAIN is returned to user space, and drmIoctl() in user space just calls the came ioctl again. Sounds like drmIoctl() is completely wrong.
Maybe it wasn't clear, but -EAGAIN does _not_ resubmit work. -EAGAIN is used to restart the ioctl if we had to kick a thread (to make sure it doesn't hold any locks), e.g. for a blocking wait on oustanding rendering. The codepaths taken work exactly as if the thread is interrupt with a signal.
Okay. So if I understand correctly that reserves EAGAIN for a very specific purpose DRM-wide and therefore we can't really use it for something Tegra-specific. Even if we wanted to side-step the issue by not using drmIoctl(), it might be a bad idea (or even break in some other path) if we use EAGAIN. I guess we'll have to find some other error-code for the case where a zero timeout is given to the syncpoint wait ioctl.
Maybe it's best to use ETIMEDOUT after, just like for the case of a non-zero timeout and the syncpoint not being incremented in time. Userspace should be able to differentiate between the two because it knows what timeout it did pass to the ioctl.
Thierry
On 11.06.2013 15:09, Daniel Vetter wrote:
Maybe it wasn't clear, but -EAGAIN does _not_ resubmit work. -EAGAIN is used to restart the ioctl if we had to kick a thread (to make sure it doesn't hold any locks), e.g. for a blocking wait on oustanding rendering. The codepaths taken work exactly as if the thread is interrupt with a signal.
You did make it clear that there's no resubmission, but other parts confused me.
So this is used so that a legacy driver which does not do fine-grained locking can interrupt all waits for completion for a wedged submit. This way a driver-wide lock get unlocked, cleanup code acquires locks, does the magic to unwedge GPU, and unlocks. Then user space can re-submit the waits as it got -EAGAIN.
Fortunately the error code doesn't really matter as long as it's not EAGAIN, so we can just use ETIMEDOUT as Thierry suggested in his follow-up.
Terje
On Wed, Jun 12, 2013 at 12:28 PM, Terje Bergström tbergstrom@nvidia.com wrote:
On 11.06.2013 15:09, Daniel Vetter wrote:
Maybe it wasn't clear, but -EAGAIN does _not_ resubmit work. -EAGAIN is used to restart the ioctl if we had to kick a thread (to make sure it doesn't hold any locks), e.g. for a blocking wait on oustanding rendering. The codepaths taken work exactly as if the thread is interrupt with a signal.
You did make it clear that there's no resubmission, but other parts confused me.
So this is used so that a legacy driver which does not do fine-grained locking can interrupt all waits for completion for a wedged submit. This way a driver-wide lock get unlocked, cleanup code acquires locks, does the magic to unwedge GPU, and unlocks. Then user space can re-submit the waits as it got -EAGAIN.
I think this is not just for drivers without fine-grained locking, at least I expect that we'll keep the same mechanism when switching over to per-object locking - we simply have too many places where a thread could arbitrarily block while holding locks that the gpu reset handler also needs to grab. You could of course restructure the code massively and drop all locks while waiting, but that means adding tons of special-purpose code which is only really exercises when a gpu hang occurs. Our approach with the ioctl restart otoh reuses codepaths which are all heavily used by X (due to X constantly getting interrupt by timers and input events). -Daniel -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch
This patch fixes a bad memory access in syncpoint request code. If no syncpoints were available, the code accessed unreserved memory area causing unexpected behaviour.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/syncpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 5bf5366..6b7ee88 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -40,7 +40,7 @@ static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) ; - if (sp->dev) + if (i >= host->info->nb_pts) return NULL;
name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
On Fri, May 17, 2013 at 02:49:45PM +0300, Arto Merilainen wrote:
This patch fixes a bad memory access in syncpoint request code. If no syncpoints were available, the code accessed unreserved memory area causing unexpected behaviour.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com
drivers/gpu/host1x/syncpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 5bf5366..6b7ee88 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -40,7 +40,7 @@ static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) ;
- if (sp->dev)
- if (i >= host->info->nb_pts) return NULL;
While changing this, can you please add a blank line between the loop and the new 'if (...)'?
Thierry
On 05/26/2013 01:15 PM, Thierry Reding wrote:
- PGP Signed by an unknown key
On Fri, May 17, 2013 at 02:49:45PM +0300, Arto Merilainen wrote:
This patch fixes a bad memory access in syncpoint request code. If no syncpoints were available, the code accessed unreserved memory area causing unexpected behaviour.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com
drivers/gpu/host1x/syncpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 5bf5366..6b7ee88 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -40,7 +40,7 @@ static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) ;
- if (sp->dev)
- if (i >= host->info->nb_pts) return NULL;
While changing this, can you please add a blank line between the loop and the new 'if (...)'?
Yep. Will do.
- Arto
client_managed field in syncpoint structure was defined as an integer. The field holds, however, only a boolean value. This patch modifies the type to boolean.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/drm/gr2d.c | 2 +- drivers/gpu/host1x/syncpt.c | 8 ++++---- drivers/gpu/host1x/syncpt.h | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c index 6a45ae0..6c3a27b 100644 --- a/drivers/gpu/host1x/drm/gr2d.c +++ b/drivers/gpu/host1x/drm/gr2d.c @@ -281,7 +281,7 @@ static int gr2d_probe(struct platform_device *pdev) if (!gr2d->channel) return -ENOMEM;
- *syncpts = host1x_syncpt_request(dev, 0); + *syncpts = host1x_syncpt_request(dev, false); if (!(*syncpts)) { host1x_channel_free(gr2d->channel); return -ENOMEM; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 6b7ee88..e560d67 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -32,7 +32,7 @@
static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host, struct device *dev, - int client_managed) + bool client_managed) { int i; struct host1x_syncpt *sp = host->syncpt; @@ -331,7 +331,7 @@ int host1x_syncpt_init(struct host1x *host) host1x_syncpt_restore(host);
/* Allocate sync point to use for clearing waits for expired fences */ - host->nop_sp = _host1x_syncpt_alloc(host, NULL, 0); + host->nop_sp = _host1x_syncpt_alloc(host, NULL, false); if (!host->nop_sp) return -ENOMEM;
@@ -339,7 +339,7 @@ int host1x_syncpt_init(struct host1x *host) }
struct host1x_syncpt *host1x_syncpt_request(struct device *dev, - int client_managed) + bool client_managed) { struct host1x *host = dev_get_drvdata(dev->parent); return _host1x_syncpt_alloc(host, dev, client_managed); @@ -353,7 +353,7 @@ void host1x_syncpt_free(struct host1x_syncpt *sp) kfree(sp->name); sp->dev = NULL; sp->name = NULL; - sp->client_managed = 0; + sp->client_managed = false; }
void host1x_syncpt_deinit(struct host1x *host) diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index c998061..d00e758 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -36,7 +36,7 @@ struct host1x_syncpt { atomic_t max_val; u32 base_val; const char *name; - int client_managed; + bool client_managed; struct host1x *host; struct device *dev;
@@ -94,7 +94,7 @@ static inline bool host1x_syncpt_check_max(struct host1x_syncpt *sp, u32 real) }
/* Return true if sync point is client managed. */ -static inline int host1x_syncpt_client_managed(struct host1x_syncpt *sp) +static inline bool host1x_syncpt_client_managed(struct host1x_syncpt *sp) { return sp->client_managed; } @@ -157,7 +157,7 @@ u32 host1x_syncpt_id(struct host1x_syncpt *sp);
/* Allocate a sync point for a device. */ struct host1x_syncpt *host1x_syncpt_request(struct device *dev, - int client_managed); + bool client_managed);
/* Free a sync point. */ void host1x_syncpt_free(struct host1x_syncpt *sp);
On Fri, May 17, 2013 at 02:49:46PM +0300, Arto Merilainen wrote:
client_managed field in syncpoint structure was defined as an integer. The field holds, however, only a boolean value. This patch modifies the type to boolean.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com
drivers/gpu/host1x/drm/gr2d.c | 2 +- drivers/gpu/host1x/syncpt.c | 8 ++++---- drivers/gpu/host1x/syncpt.h | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-)
Looks good. I'll wait for v2 of the series before applying this to make things easier.
Thierry
This patch merges host1x_syncpt_cpu_incr to host1x_syncpt_incr() as they are in practise doing the same thing. host1x_syncpt_incr() is also modified to return error codes.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/dev.h | 8 ++++---- drivers/gpu/host1x/hw/cdma_hw.c | 2 +- drivers/gpu/host1x/hw/syncpt_hw.c | 12 +++++------- drivers/gpu/host1x/syncpt.c | 15 ++------------- drivers/gpu/host1x/syncpt.h | 7 ++----- 5 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index a1607d6..790ddf1 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -73,7 +73,7 @@ struct host1x_syncpt_ops { void (*restore_wait_base)(struct host1x_syncpt *syncpt); void (*load_wait_base)(struct host1x_syncpt *syncpt); u32 (*load)(struct host1x_syncpt *syncpt); - void (*cpu_incr)(struct host1x_syncpt *syncpt); + int (*cpu_incr)(struct host1x_syncpt *syncpt); int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr); };
@@ -157,10 +157,10 @@ static inline u32 host1x_hw_syncpt_load(struct host1x *host, return host->syncpt_op->load(sp); }
-static inline void host1x_hw_syncpt_cpu_incr(struct host1x *host, - struct host1x_syncpt *sp) +static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host, + struct host1x_syncpt *sp) { - host->syncpt_op->cpu_incr(sp); + return host->syncpt_op->cpu_incr(sp); }
static inline int host1x_hw_syncpt_patch_wait(struct host1x *host, diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 590b69d..2ee4ad5 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -44,7 +44,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, u32 i;
for (i = 0; i < syncpt_incrs; i++) - host1x_syncpt_cpu_incr(cdma->timeout.syncpt); + host1x_syncpt_incr(cdma->timeout.syncpt);
/* after CPU incr, ensure shadow is up to date */ host1x_syncpt_load(cdma->timeout.syncpt); diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index 6117499..0cf6095 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -77,21 +77,19 @@ static u32 syncpt_load(struct host1x_syncpt *sp) * Write a cpu syncpoint increment to the hardware, without touching * the cache. */ -static void syncpt_cpu_incr(struct host1x_syncpt *sp) +static int syncpt_cpu_incr(struct host1x_syncpt *sp) { struct host1x *host = sp->host; u32 reg_offset = sp->id / 32;
if (!host1x_syncpt_client_managed(sp) && - host1x_syncpt_idle(sp)) { - dev_err(host->dev, "Trying to increment syncpoint id %d beyond max\n", - sp->id); - host1x_debug_dump(sp->host); - return; - } + host1x_syncpt_idle(sp)) + return -EINVAL; host1x_sync_writel(host, BIT_MASK(sp->id), HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset)); wmb(); + + return 0; }
/* remove a wait pointed to by patch_addr */ diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e560d67..afb631a 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -128,22 +128,11 @@ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp) }
/* - * Write a cpu syncpoint increment to the hardware, without touching - * the cache. Caller is responsible for host being powered. - */ -void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp) -{ - host1x_hw_syncpt_cpu_incr(sp->host, sp); -} - -/* * Increment syncpoint value from cpu, updating cache */ -void host1x_syncpt_incr(struct host1x_syncpt *sp) +int host1x_syncpt_incr(struct host1x_syncpt *sp) { - if (host1x_syncpt_client_managed(sp)) - host1x_syncpt_incr_max(sp, 1); - host1x_syncpt_cpu_incr(sp); + return host1x_hw_syncpt_cpu_incr(sp->host, sp); }
/* diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index d00e758..267c0b9 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -115,9 +115,6 @@ static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp) /* Return pointer to struct denoting sync point id. */ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id);
-/* Request incrementing a sync point. */ -void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp); - /* Load current value from hardware to the shadow register. */ u32 host1x_syncpt_load(struct host1x_syncpt *sp);
@@ -133,8 +130,8 @@ void host1x_syncpt_restore(struct host1x *host); /* Read current wait base value into shadow register and return it. */ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp);
-/* Increment sync point and its max. */ -void host1x_syncpt_incr(struct host1x_syncpt *sp); +/* Request incrementing a sync point. */ +int host1x_syncpt_incr(struct host1x_syncpt *sp);
/* Indicate future operations by incrementing the sync point max. */ u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
On Fri, May 17, 2013 at 02:49:47PM +0300, Arto Merilainen wrote:
This patch merges host1x_syncpt_cpu_incr to host1x_syncpt_incr() as they are in practise doing the same thing. host1x_syncpt_incr() is also modified to return error codes.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com
drivers/gpu/host1x/dev.h | 8 ++++---- drivers/gpu/host1x/hw/cdma_hw.c | 2 +- drivers/gpu/host1x/hw/syncpt_hw.c | 12 +++++------- drivers/gpu/host1x/syncpt.c | 15 ++------------- drivers/gpu/host1x/syncpt.h | 7 ++----- 5 files changed, 14 insertions(+), 30 deletions(-)
Looks good. Can you carry this over to v2 as well so I can apply all of them in one go?
Thierry
Add syncpoint increment to return a proper return code based on the return value from host1x.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com --- drivers/gpu/host1x/drm/drm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c index 2b561c9..1dfd454 100644 --- a/drivers/gpu/host1x/drm/drm.c +++ b/drivers/gpu/host1x/drm/drm.c @@ -378,8 +378,7 @@ static int tegra_syncpt_incr(struct drm_device *drm, void *data, if (!sp) return -EINVAL;
- host1x_syncpt_incr(sp); - return 0; + return host1x_syncpt_incr(sp); }
static int tegra_syncpt_wait(struct drm_device *drm, void *data,
On Fri, May 17, 2013 at 02:49:48PM +0300, Arto Merilainen wrote:
Add syncpoint increment to return a proper return code based on the return value from host1x.
Signed-off-by: Arto Merilainen amerilainen@nvidia.com
drivers/gpu/host1x/drm/drm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
Maybe this should be folded into patch 5 because it is related to the fact that host1x_syncpt_incr() now returns proper error codes?
Thierry
dri-devel@lists.freedesktop.org