When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Gustavo Padovan gustavo.padovan@collabora.co.uk Cc: Rob Clark robdclark@gmail.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org ---
Gustavo, could you look at the sync-file/fence-array handling? There's definitely room for a cleaner sync_file_create() API.
I was thinking about this for the "why not sync-file" question Gustavo posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow exporting and creating fences from sync-file for testing passing explicit userspaces around between drivers, and realised that I was just writing a generic mechanism that only required dma-buf.
Whilst this interface could be used for lazily creating explicit fences, drivers will also likely want to support specific ioctl to skip the dmabuf creation, I think this interfaces will be useful with the vgem fences for testing sync_file handling in drivers (on i915 at the moment, my tests for sync_file involve sleeping and a few white lies). So fulfilling a similar role in driver testing to debugfs/sw_sync? (sw_sync is still more apt for testing timelines etc, vgem feels more apt for ease of testing rendering.) -Chris
--- drivers/dma-buf/dma-buf.c | 100 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/dma-buf.h | 9 ++++ 2 files changed, 109 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 41fbce0c273a..6f066a8affd7 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -26,11 +26,13 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/fence.h> +#include <linux/fence-array.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/sync_file.h> #include <linux/poll.h> #include <linux/reservation.h> #include <linux/mm.h> @@ -254,6 +256,97 @@ out: return events; }
+static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf, + struct dma_buf_fence __user *arg) +{ + struct reservation_object *resv = dmabuf->resv; + struct dma_buf_fence cmd; + struct fence *fence; + int ret; + + if (copy_from_user(&cmd, arg, sizeof(cmd))) + return -EFAULT; + + fence = NULL; + //fence = sync_file_get_fence(cmd.fd); + if (!fence) + return -EINVAL; + + mutex_lock(&resv->lock.base); + if (cmd.flags & DMA_BUF_FENCE_WRITE) + reservation_object_add_excl_fence(resv, fence); + else if ((ret = reservation_object_reserve_shared(resv)) == 0) + reservation_object_add_shared_fence(resv, fence); + mutex_unlock(&resv->lock.base); + + fence_put(fence); + return ret; +} + +static long dma_buf_export_fence_ioctl(struct dma_buf *dmabuf, + struct dma_buf_fence __user *arg) +{ + struct reservation_object *resv = dmabuf->resv; + struct dma_buf_fence cmd; + struct fence *excl, **shared; + struct sync_file *sync = NULL; + unsigned int count, n; + int ret; + + if (get_user(cmd.flags, &arg->flags)) + return -EFAULT; + + ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared); + if (ret) + return ret; + + if (cmd.flags & DMA_BUF_FENCE_WRITE) { + if (excl) { + sync = sync_file_create(excl); + if (!sync) { + ret = -ENOMEM; + fence_put(excl); + } + } + for (n = 0; n < count; n++) + fence_put(shared[n]); + kfree(shared); + } else { + if (count) { + struct fence_array *array; + + array = fence_array_create(count, shared, 0, 0, false); + if (!array) { + for (n = 0; n < count; n++) + fence_put(shared[n]); + kfree(shared); + } else + sync = sync_file_create(&array->base); + if (!sync) { + ret = -ENOMEM; + fence_put(&array->base); + } + } + fence_put(excl); + } + if (ret) + return ret; + + cmd.fd = get_unused_fd_flags(O_CLOEXEC); + if (cmd.fd < 0) { + fput(sync->file); + return cmd.fd; + } + + if (put_user(cmd.fd, &arg->fd)) { + fput(sync->file); + return -EFAULT; + } + + fd_install(cmd.fd, sync->file); + return 0; +} + static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -292,6 +385,13 @@ static long dma_buf_ioctl(struct file *file, ret = dma_buf_begin_cpu_access(dmabuf, direction);
return ret; + + case DMA_BUF_IOCTL_IMPORT_FENCE: + return dma_buf_import_fence_ioctl(dmabuf, (void __user *)arg); + + case DMA_BUF_IOCTL_EXPORT_FENCE: + return dma_buf_export_fence_ioctl(dmabuf, (void __user *)arg); + default: return -ENOTTY; } diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index fb0dedb7c121..8d9a0d73ebaa 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -37,4 +37,13 @@ struct dma_buf_sync { #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
+struct dma_buf_fence { + __s32 fd; + __u32 flags; +#define DMA_BUF_FENCE_WRITE (1 << 0) +}; + +#define DMA_BUF_IOCTL_IMPORT_FENCE _IOW(DMA_BUF_BASE, 1, struct dma_buf_fence) +#define DMA_BUF_IOCTL_EXPORT_FENCE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_fence) + #endif
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Well I think that this is a rather questionable interface.
For example how do you deal with race conditions? E.g. between querying the fences from the reservation object and adding a new one we could gain new fences because of the kernel swapping things out or another thread making some submission with this buffer.
Additional to that IIRC reservation_object_add_excl_fence() currently replaces all shared fences with the exclusive one. A malicious application could use this to trick the kernel driver into thinking that this buffer object is idle while it is still accessed by some operation. At least with GPU operations you can easily take over the system when you manage to get access to a page table with this.
Regards, Christian.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Gustavo Padovan gustavo.padovan@collabora.co.uk Cc: Rob Clark robdclark@gmail.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org
Gustavo, could you look at the sync-file/fence-array handling? There's definitely room for a cleaner sync_file_create() API.
I was thinking about this for the "why not sync-file" question Gustavo posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow exporting and creating fences from sync-file for testing passing explicit userspaces around between drivers, and realised that I was just writing a generic mechanism that only required dma-buf.
Whilst this interface could be used for lazily creating explicit fences, drivers will also likely want to support specific ioctl to skip the dmabuf creation, I think this interfaces will be useful with the vgem fences for testing sync_file handling in drivers (on i915 at the moment, my tests for sync_file involve sleeping and a few white lies). So fulfilling a similar role in driver testing to debugfs/sw_sync? (sw_sync is still more apt for testing timelines etc, vgem feels more apt for ease of testing rendering.) -Chris
drivers/dma-buf/dma-buf.c | 100 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/dma-buf.h | 9 ++++ 2 files changed, 109 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 41fbce0c273a..6f066a8affd7 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -26,11 +26,13 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/fence.h> +#include <linux/fence-array.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/sync_file.h> #include <linux/poll.h> #include <linux/reservation.h> #include <linux/mm.h> @@ -254,6 +256,97 @@ out: return events; }
+static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
struct dma_buf_fence __user *arg)
+{
- struct reservation_object *resv = dmabuf->resv;
- struct dma_buf_fence cmd;
- struct fence *fence;
- int ret;
- if (copy_from_user(&cmd, arg, sizeof(cmd)))
return -EFAULT;
- fence = NULL;
- //fence = sync_file_get_fence(cmd.fd);
- if (!fence)
return -EINVAL;
- mutex_lock(&resv->lock.base);
- if (cmd.flags & DMA_BUF_FENCE_WRITE)
reservation_object_add_excl_fence(resv, fence);
- else if ((ret = reservation_object_reserve_shared(resv)) == 0)
reservation_object_add_shared_fence(resv, fence);
- mutex_unlock(&resv->lock.base);
- fence_put(fence);
- return ret;
+}
+static long dma_buf_export_fence_ioctl(struct dma_buf *dmabuf,
struct dma_buf_fence __user *arg)
+{
- struct reservation_object *resv = dmabuf->resv;
- struct dma_buf_fence cmd;
- struct fence *excl, **shared;
- struct sync_file *sync = NULL;
- unsigned int count, n;
- int ret;
- if (get_user(cmd.flags, &arg->flags))
return -EFAULT;
- ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
- if (ret)
return ret;
- if (cmd.flags & DMA_BUF_FENCE_WRITE) {
if (excl) {
sync = sync_file_create(excl);
if (!sync) {
ret = -ENOMEM;
fence_put(excl);
}
}
for (n = 0; n < count; n++)
fence_put(shared[n]);
kfree(shared);
- } else {
if (count) {
struct fence_array *array;
array = fence_array_create(count, shared, 0, 0, false);
if (!array) {
for (n = 0; n < count; n++)
fence_put(shared[n]);
kfree(shared);
} else
sync = sync_file_create(&array->base);
if (!sync) {
ret = -ENOMEM;
fence_put(&array->base);
}
}
fence_put(excl);
- }
- if (ret)
return ret;
- cmd.fd = get_unused_fd_flags(O_CLOEXEC);
- if (cmd.fd < 0) {
fput(sync->file);
return cmd.fd;
- }
- if (put_user(cmd.fd, &arg->fd)) {
fput(sync->file);
return -EFAULT;
- }
- fd_install(cmd.fd, sync->file);
- return 0;
+}
- static long dma_buf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) {
@@ -292,6 +385,13 @@ static long dma_buf_ioctl(struct file *file, ret = dma_buf_begin_cpu_access(dmabuf, direction);
return ret;
- case DMA_BUF_IOCTL_IMPORT_FENCE:
return dma_buf_import_fence_ioctl(dmabuf, (void __user *)arg);
- case DMA_BUF_IOCTL_EXPORT_FENCE:
return dma_buf_export_fence_ioctl(dmabuf, (void __user *)arg);
- default: return -ENOTTY; }
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h index fb0dedb7c121..8d9a0d73ebaa 100644 --- a/include/uapi/linux/dma-buf.h +++ b/include/uapi/linux/dma-buf.h @@ -37,4 +37,13 @@ struct dma_buf_sync { #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
+struct dma_buf_fence {
- __s32 fd;
- __u32 flags;
+#define DMA_BUF_FENCE_WRITE (1 << 0) +};
+#define DMA_BUF_IOCTL_IMPORT_FENCE _IOW(DMA_BUF_BASE, 1, struct dma_buf_fence) +#define DMA_BUF_IOCTL_EXPORT_FENCE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_fence)
- #endif
On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Well I think that this is a rather questionable interface.
For example how do you deal with race conditions? E.g. between querying the fences from the reservation object and adding a new one we could gain new fences because of the kernel swapping things out or another thread making some submission with this buffer.
Additional to that IIRC reservation_object_add_excl_fence() currently replaces all shared fences with the exclusive one. A malicious application could use this to trick the kernel driver into thinking that this buffer object is idle while it is still accessed by some operation. At least with GPU operations you can easily take over the system when you manage to get access to a page table with this.
The only difference here is that we believe the GPU drivers to enforce the ordering between each other. So we can either insert a wait before adding the exclusive fence, or we can just not expose an import ioctl. Extracting the set of fences isn't an issue? (That's the part that has a more legitimate usecase.) -Chris
On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Well I think that this is a rather questionable interface.
For example how do you deal with race conditions? E.g. between querying the fences from the reservation object and adding a new one we could gain new fences because of the kernel swapping things out or another thread making some submission with this buffer.
Additional to that IIRC reservation_object_add_excl_fence() currently replaces all shared fences with the exclusive one. A malicious application could use this to trick the kernel driver into thinking that this buffer object is idle while it is still accessed by some operation. At least with GPU operations you can easily take over the system when you manage to get access to a page table with this.
The only difference here is that we believe the GPU drivers to enforce the ordering between each other. So we can either insert a wait before adding the exclusive fence, or we can just not expose an import ioctl. Extracting the set of fences isn't an issue? (That's the part that has a more legitimate usecase.)
If we change the kernel to just merge everything together when importing a new fence I think it should be perfectly save. I.e. 1) grab reservation lock 2) assemble a fence_array with the current fences + the new one passed in through sync_file. 3) put that into the right slot 4) unlock
If we switch reservations over to fence_array it might even be somewhat pretty. -Daniel
Am 12.07.2016 um 16:31 schrieb Daniel Vetter:
On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Well I think that this is a rather questionable interface.
For example how do you deal with race conditions? E.g. between querying the fences from the reservation object and adding a new one we could gain new fences because of the kernel swapping things out or another thread making some submission with this buffer.
Additional to that IIRC reservation_object_add_excl_fence() currently replaces all shared fences with the exclusive one. A malicious application could use this to trick the kernel driver into thinking that this buffer object is idle while it is still accessed by some operation. At least with GPU operations you can easily take over the system when you manage to get access to a page table with this.
The only difference here is that we believe the GPU drivers to enforce the ordering between each other. So we can either insert a wait before adding the exclusive fence, or we can just not expose an import ioctl. Extracting the set of fences isn't an issue? (That's the part that has a more legitimate usecase.)
If we change the kernel to just merge everything together when importing a new fence I think it should be perfectly save. I.e.
- grab reservation lock
- assemble a fence_array with the current fences + the new one passed in
through sync_file. 3) put that into the right slot 4) unlock
Yeah that sounds like this should work.
If we switch reservations over to fence_array it might even be somewhat pretty.
Actually I rather wanted to suggest that we use something like the amdgpu_sync object for the reservation object instead.
E.g. a collection like interface where fences can be added later on and only the newest one is kept around for each context.
The only problem with that approach is that it is a bit tricky to do without locking, e.g. only RCU.
Christian.
-Daniel
On Tue, Jul 12, 2016 at 05:36:37PM +0200, Christian König wrote:
Am 12.07.2016 um 16:31 schrieb Daniel Vetter:
On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Well I think that this is a rather questionable interface.
For example how do you deal with race conditions? E.g. between querying the fences from the reservation object and adding a new one we could gain new fences because of the kernel swapping things out or another thread making some submission with this buffer.
Additional to that IIRC reservation_object_add_excl_fence() currently replaces all shared fences with the exclusive one. A malicious application could use this to trick the kernel driver into thinking that this buffer object is idle while it is still accessed by some operation. At least with GPU operations you can easily take over the system when you manage to get access to a page table with this.
The only difference here is that we believe the GPU drivers to enforce the ordering between each other. So we can either insert a wait before adding the exclusive fence, or we can just not expose an import ioctl. Extracting the set of fences isn't an issue? (That's the part that has a more legitimate usecase.)
If we change the kernel to just merge everything together when importing a new fence I think it should be perfectly save. I.e.
- grab reservation lock
- assemble a fence_array with the current fences + the new one passed in
through sync_file. 3) put that into the right slot 4) unlock
Yeah that sounds like this should work.
If we switch reservations over to fence_array it might even be somewhat pretty.
Actually I rather wanted to suggest that we use something like the amdgpu_sync object for the reservation object instead.
E.g. a collection like interface where fences can be added later on and only the newest one is kept around for each context.
Yeah, imo fence_array should automatically reduce the set of fences to the minimal set needed and merge all the ones on the same timeline. And we could rebase reservations on top of fence-array.
Or at least something along those lines.
The only problem with that approach is that it is a bit tricky to do without locking, e.g. only RCU.
I thought RCU is only for reading/waiting on fences, and that for changing them you always need to acquire the ww mutex behind the reservation?
Cheers, Daniel
2016-07-12 6:59 GMT+09:00 Chris Wilson chris@chris-wilson.co.uk:
When dealing with user interfaces that utilize explicit fences, it is convenient to sometimes create those fences after the fact, i.e. to query the dma-buf for the current set of implicit fences, encapsulate those into a sync_file and hand that fd back to userspace. Correspondingly, being able to add an explicit fence back into the mix of fences being tracked by the dma-buf allows that userspace fence to be included in any implicit tracking.
Regarding import fence, explicit fence can be corresponded to one of sub-allocated blocks of a DMA buffer. What if user-space requested import fence with a fence corresponding to a sub-allocated block? As you know, implicit fence is always corresponded to one DMA buffer not sub block of it. I guess there may be other things you should consider for import fence interface.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Cc: Gustavo Padovan gustavo.padovan@collabora.co.uk Cc: Rob Clark robdclark@gmail.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org
Gustavo, could you look at the sync-file/fence-array handling? There's definitely room for a cleaner sync_file_create() API.
I was thinking about this for the "why not sync-file" question Gustavo posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow exporting and creating fences from sync-file for testing passing explicit userspaces around between drivers, and realised that I was just writing a generic mechanism that only required dma-buf.
Whilst this interface could be used for lazily creating explicit fences, drivers will also likely want to support specific ioctl to skip the dmabuf creation, I think this interfaces will be useful with the vgem fences for testing sync_file handling in drivers (on i915 at the moment, my tests for sync_file involve sleeping and a few white lies). So fulfilling a similar role in driver testing to debugfs/sw_sync? (sw_sync is still more apt for testing timelines etc, vgem feels more apt for ease of testing rendering.) -Chris
drivers/dma-buf/dma-buf.c | 100 +++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/dma-buf.h | 9 ++++ 2 files changed, 109 insertions(+)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 41fbce0c273a..6f066a8affd7 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -26,11 +26,13 @@ #include <linux/slab.h> #include <linux/dma-buf.h> #include <linux/fence.h> +#include <linux/fence-array.h> #include <linux/anon_inodes.h> #include <linux/export.h> #include <linux/debugfs.h> #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/sync_file.h> #include <linux/poll.h> #include <linux/reservation.h> #include <linux/mm.h> @@ -254,6 +256,97 @@ out: return events; }
+static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
struct dma_buf_fence __user *arg)
+{
struct reservation_object *resv = dmabuf->resv;
struct dma_buf_fence cmd;
struct fence *fence;
int ret;
if (copy_from_user(&cmd, arg, sizeof(cmd)))
return -EFAULT;
fence = NULL;
//fence = sync_file_get_fence(cmd.fd);
Your mistake?
Thanks, Inki Dae
if (!fence)
return -EINVAL;
dri-devel@lists.freedesktop.org