Hi, Tiago.
On 08/26/2015 02:02 AM, Tiago Vignatti wrote:
From: Daniel Vetter daniel.vetter@ffwll.ch
The userspace might need some sort of cache coherency management e.g. when CPU and GPU domains are being accessed through dma-buf at the same time. To circumvent this problem there are begin/end coherency markers, that forward directly to existing dma-buf device drivers vfunc hooks. Userspace can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be used like following:
- mmap dma-buf fd
- for each drawing/upload cycle in CPU
- SYNC_START ioctl
- read/write to mmap area or a 2d sub-region of it
- SYNC_END ioctl.
- munamp once you don't need the buffer any more
v2 (Tiago): Fix header file type names (u64 -> __u64) v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end dma-buf functions. Check for overflows in start/length. v4 (Tiago): use 2d regions for sync.
Daniel V had issues with the sync argument proposed by Daniel S. I'm fine with that argument or an argument containing only a single sync rect. I'm not sure whether Daniel V will find it easier to accept only a single sync rect...
Also please see below.
Cc: Sumit Semwal sumit.semwal@linaro.org Signed-off-by: Daniel Vetter daniel.vetter@intel.com Signed-off-by: Tiago Vignatti tiago.vignatti@intel.com
I'm unable to test the 2d sync properly, because begin/end access in i915 don't track mapped range for nothing.
Documentation/dma-buf-sharing.txt | 13 ++++++ drivers/dma-buf/dma-buf.c | 77 ++++++++++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem_dmabuf.c | 6 ++- include/linux/dma-buf.h | 20 +++++---- include/uapi/linux/dma-buf.h | 57 +++++++++++++++++++++++++ 5 files changed, 150 insertions(+), 23 deletions(-) create mode 100644 include/uapi/linux/dma-buf.h
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt index 480c8de..8061ac0 100644 --- a/Documentation/dma-buf-sharing.txt +++ b/Documentation/dma-buf-sharing.txt @@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
No special interfaces, userspace simply calls mmap on the dma-buf fd.
- Also, the userspace might need some sort of cache coherency management e.g.
- when CPU and GPU domains are being accessed through dma-buf at the same
- time. To circumvent this problem there are begin/end coherency markers, that
- forward directly to existing dma-buf device drivers vfunc hooks. Userspace
- can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
- sequence would be used like following:
- mmap dma-buf fd
- for each drawing/upload cycle in CPU
1. SYNC_START ioctl
2. read/write to mmap area or a 2d sub-region of it
3. SYNC_END ioctl.
- munamp once you don't need the buffer any more
Supporting existing mmap interfaces in importers
Similar to the motivation for kernel cpu access it is again important that
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 155c146..b6a4a06 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -251,11 +251,55 @@ out: return events; }
+static long dma_buf_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
+{
- struct dma_buf *dmabuf;
- struct dma_buf_sync sync;
- enum dma_data_direction direction;
- dmabuf = file->private_data;
- if (!is_dma_buf_file(file))
return -EINVAL;
- if (cmd != DMA_BUF_IOCTL_SYNC)
return -ENOTTY;
- if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
return -EFAULT;
Do we actually copy all sync regions here?
- if (sync.flags & DMA_BUF_SYNC_RW)
direction = DMA_BIDIRECTIONAL;
- else if (sync.flags & DMA_BUF_SYNC_READ)
direction = DMA_FROM_DEVICE;
- else if (sync.flags & DMA_BUF_SYNC_WRITE)
direction = DMA_TO_DEVICE;
- else
return -EINVAL;
- if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
return -EINVAL;
- /* TODO: check for overflowing the buffer's size - how so, checking region by
* region here? Maybe need to check for the other parameters as well. */
- if (sync.flags & DMA_BUF_SYNC_END)
dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
sync.num_regions, sync.regions, direction);
- else
dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
sync.num_regions, sync.regions, direction);
- return 0;
+}
static const struct file_operations dma_buf_fops = { .release = dma_buf_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll,
- .unlocked_ioctl = dma_buf_ioctl,
};
/* @@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
- preparations. Coherency is only guaranteed in the specified range for the
- specified access direction.
- @dmabuf: [in] buffer to prepare cpu access for.
- @start: [in] start of range for cpu access.
- @len: [in] length of range for cpu access.
- @direction: [in] length of range for cpu access.
- @stride_bytes: [in] stride in bytes for cpu access.
- @bytes_per_pixel: [in] bytes per pixel of the region for cpu access.
- @num_regions: [in] number of regions.
- @region: [in] vector of 2-dimensional regions for cpu access.
*/
- @direction: [in] direction of range for cpu access.
- Can return negative error values, returns 0 on success.
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
enum dma_data_direction direction)
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
- size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
- enum dma_data_direction direction)
{ int ret = 0;
@@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len, return -EINVAL;
if (dmabuf->ops->begin_cpu_access)
ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
len, direction);
ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
num_regions, regions, direction);
return ret;
} @@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
- actions. Coherency is only guaranteed in the specified range for the
- specified access direction.
- @dmabuf: [in] buffer to complete cpu access for.
- @start: [in] start of range for cpu access.
- @len: [in] length of range for cpu access.
- @direction: [in] length of range for cpu access.
- @stride_bytes: [in] stride in bytes for cpu access.
- @bytes_per_pixel: [in] bytes per pixel of the region for cpu access.
- @num_regions: [in] number of regions.
- @regions: [in] vector of 2-dimensional regions for cpu access.
*/
- @direction: [in] direction of range for cpu access.
- This call must always succeed.
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
enum dma_data_direction direction)
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
- size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
- enum dma_data_direction direction)
{ WARN_ON(!dmabuf);
if (dmabuf->ops->end_cpu_access)
dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
num_regions, regions, direction);
} EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 95cbfff..e5bb7a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * return 0; }
-static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
{ struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; @@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size return ret; }
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) +static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
{ struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index f98bd70..ed457cb 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -33,6 +33,8 @@ #include <linux/fence.h> #include <linux/wait.h>
+#include <uapi/linux/dma-buf.h>
struct device; struct dma_buf; struct dma_buf_attachment; @@ -93,10 +95,10 @@ struct dma_buf_ops { /* after final dma_buf_put() */ void (*release)(struct dma_buf *);
- int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
enum dma_data_direction);
- void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
enum dma_data_direction);
- int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
struct dma_buf_sync_region [], enum dma_data_direction);
- void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
void *(*kmap_atomic)(struct dma_buf *, unsigned long); void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *); void *(*kmap)(struct dma_buf *, unsigned long);struct dma_buf_sync_region [], enum dma_data_direction);
@@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *, enum dma_data_direction); void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *, enum dma_data_direction); -int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
enum dma_data_direction dir);
+int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
size_t bytes_per_pixel, size_t num_regions,
struct dma_buf_sync_region regions[], enum dma_data_direction dir);
+void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
size_t bytes_per_pixel, size_t num_regions,
struct dma_buf_sync_region regions[], enum dma_data_direction dir);
void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long); void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *); void *dma_buf_kmap(struct dma_buf *, unsigned long); diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h new file mode 100644 index 0000000..c63b578 --- /dev/null +++ b/include/uapi/linux/dma-buf.h @@ -0,0 +1,57 @@ +/*
- Framework for buffer objects that can be shared across devices/subsystems.
- Copyright(C) 2015 Intel Ltd
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License version 2 as published by
- the Free Software Foundation.
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
- You should have received a copy of the GNU General Public License along with
- this program. If not, see http://www.gnu.org/licenses/.
- */
+#ifndef _DMA_BUF_UAPI_H_ +#define _DMA_BUF_UAPI_H_
+enum dma_buf_sync_flags {
- DMA_BUF_SYNC_READ = (1 << 0),
- DMA_BUF_SYNC_WRITE = (2 << 0),
- DMA_BUF_SYNC_RW = (3 << 0),
- DMA_BUF_SYNC_START = (0 << 2),
- DMA_BUF_SYNC_END = (1 << 2),
- DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW |
DMA_BUF_SYNC_END
+};
+/* 2-dimensional region, used for multi-range flush. This can be used to
- synchronize the CPU by batching several sub-regions, smaller than the
- mapped dma-buf, all at once. */
+struct dma_buf_sync_region {
- __u64 x;
- __u64 y;
- __u64 width;
- __u64 height;
+};
+/* begin/end dma-buf functions used for userspace mmap. */ +struct dma_buf_sync {
- enum dma_buf_sync_flags flags;
- __u64 stride_bytes;
- __u32 bytes_per_pixel;
- __u32 num_regions;
- struct dma_buf_sync_region regions[];
+};
+#define DMA_BUF_BASE 'b' +#define DMA_BUF_IOCTL_SYNC _IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync)
Should be _IOW( ?
+#endif
Thomas