With the addition of device memory (lmem) for the i915 driver, the dma-buf interface needs a little polishing.
Some minor cleanup of some variables to make upcoming patches a little easier.
Normalize struct sg_table to sgt. Normalize struct dma_buf_attachment to attach. checkpatch issues sizeof(), !NULL updates.
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 58 +++++++++++----------- 1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 7db5a793739d..0d9124ad549a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -17,11 +17,11 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); }
-static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf); + struct sg_table *sgt; struct scatterlist *src, *dst; int ret, i;
@@ -30,54 +30,54 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme goto err;
/* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { ret = -ENOMEM; goto err_unpin_pages; }
- ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(sgt, obj->mm.pages->nents, GFP_KERNEL); if (ret) goto err_free;
src = obj->mm.pages->sgl; - dst = st->sgl; + dst = sgt->sgl; for (i = 0; i < obj->mm.pages->nents; i++) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); }
- if (!dma_map_sg_attrs(attachment->dev, - st->sgl, st->nents, dir, + if (!dma_map_sg_attrs(attach->dev, + sgt->sgl, sgt->nents, dir, DMA_ATTR_SKIP_CPU_SYNC)) { ret = -ENOMEM; goto err_free_sg; }
- return st; + return sgt;
err_free_sg: - sg_free_table(st); + sg_free_table(sgt); err_free: - kfree(st); + kfree(sgt); err_unpin_pages: i915_gem_object_unpin_pages(obj); err: return ERR_PTR(ret); }
-static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, - struct sg_table *sg, +static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); + struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
- dma_unmap_sg_attrs(attachment->dev, - sg->sgl, sg->nents, dir, + dma_unmap_sg_attrs(attach->dev, + sgt->sgl, sgt->nents, dir, DMA_ATTR_SKIP_CPU_SYNC); - sg_free_table(sg); - kfree(sg); + sg_free_table(sgt); + kfree(sgt);
i915_gem_object_unpin_pages(obj); } @@ -194,25 +194,25 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - struct sg_table *pages; + struct sg_table *sgt; unsigned int sg_page_sizes;
- pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + sgt = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt);
- sg_page_sizes = i915_sg_page_sizes(pages->sgl); + sg_page_sizes = i915_sg_page_sizes(sgt->sgl);
- __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + __i915_gem_object_set_pages(obj, sgt, sg_page_sizes);
return 0; }
static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *sgt) { - dma_buf_unmap_attachment(obj->base.import_attach, pages, + dma_buf_unmap_attachment(obj->base.import_attach, sgt, DMA_BIDIRECTIONAL); }
@@ -250,7 +250,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, get_dma_buf(dma_buf);
obj = i915_gem_object_alloc(); - if (obj == NULL) { + if (!obj) { ret = -ENOMEM; goto fail_detach; }
Update open coded for loop to use the standard scatterlist for_each_sg API.
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 0d9124ad549a..7ea4abb6a896 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -7,6 +7,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> #include <linux/dma-resv.h> +#include <linux/scatterlist.h>
#include "i915_drv.h" #include "i915_gem_object.h" @@ -40,12 +41,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, if (ret) goto err_free;
- src = obj->mm.pages->sgl; dst = sgt->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->nents, i) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); - src = sg_next(src); }
if (!dma_map_sg_attrs(attach->dev,
LMEM backed buffer objects do not have struct page information. Instead the dma_address of the struct sg is used to store the LMEM address information (relative to the device, this is not the CPU physical address).
The dmabuf map handler requires pages to do a dma_map_xx.
Add new mapping/unmapping functions, based on the LMEM usage of the dma_address to allow LMEM backed buffer objects to be mapped.
Before mapping check the peer2peer distance to verify that P2P dma can occur.
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 82 ++++++++++++++++++++-- 1 file changed, 76 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 7ea4abb6a896..402c989cc23d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -7,6 +7,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> #include <linux/dma-resv.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h>
#include "i915_drv.h" @@ -18,6 +19,67 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); }
+static void dmabuf_unmap_addr(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), + dir, 0); +} + +/** + * dmabuf_map_addr - Update LMEM address to a physical address and map the + * resource. + * @dev: valid device + * @obj: valid i915 GEM object + * @sg: scatter list to appy mapping to + * @nents: number of entries in the scatter list + * @dir: DMA direction + * + * The dma_address of the scatter list is the LMEM "address". From this the + * actual physical address can be determined. + * + * Return of 0 means error. + * + */ +static int dmabuf_map_addr(struct device *dev, struct drm_i915_gem_object *obj, + struct scatterlist *sgl, int nents, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + phys_addr_t addr; + int distance; + int i; + + distance = pci_p2pdma_distance_many(obj->base.dev->pdev, &dev, 1, + true); + if (distance < 0) { + pr_info("%s: from: %s to: %s distance: %d\n", __func__, + pci_name(obj->base.dev->pdev), dev_name(dev), + distance); + return 0; + } + + for_each_sg(sgl, sg, nents, i) { + addr = sg_dma_address(sg) + obj->mm.region->io_start; + + sg->dma_address = dma_map_resource(dev, addr, sg->length, dir, + 0); + if (dma_mapping_error(dev, sg->dma_address)) + goto unmap; + sg->dma_length = sg->length; + } + + return nents; + +unmap: + dmabuf_unmap_addr(dev, sgl, i, dir); + return 0; +} + static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { @@ -44,12 +106,17 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, dst = sgt->sgl; for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->nents, i) { sg_set_page(dst, sg_page(src), src->length, 0); + sg_dma_address(dst) = sg_dma_address(src); dst = sg_next(dst); }
- if (!dma_map_sg_attrs(attach->dev, - sgt->sgl, sgt->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC)) { + if (i915_gem_object_has_struct_page(obj)) + ret = dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else + ret = dmabuf_map_addr(attach->dev, obj, sgt->sgl, sgt->nents, + dir); + if (!ret) { ret = -ENOMEM; goto err_free_sg; } @@ -72,9 +139,12 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attach, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
- dma_unmap_sg_attrs(attach->dev, - sgt->sgl, sgt->nents, dir, - DMA_ATTR_SKIP_CPU_SYNC); + if (i915_gem_object_has_struct_page(obj)) + dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else + dmabuf_unmap_addr(attach->dev, sgt->sgl, sgt->nents, dir); + sg_free_table(sgt); kfree(sgt);
On Wed, Apr 22, 2020 at 05:25:17PM -0400, Michael J. Ruhl wrote:
LMEM backed buffer objects do not have struct page information. Instead the dma_address of the struct sg is used to store the LMEM address information (relative to the device, this is not the CPU physical address).
The dmabuf map handler requires pages to do a dma_map_xx.
Add new mapping/unmapping functions, based on the LMEM usage of the dma_address to allow LMEM backed buffer objects to be mapped.
Before mapping check the peer2peer distance to verify that P2P dma can occur.
So this is supposed to check the importer's allow_peer2peer flag, and that one is supposed to require the implementation of ->move_notify. Which requires a pile of locking changes to align with dma_resv.
By not doing all that you avoid the lockdep splats, but you're also breaking the peer2peer dma-buf contract big time :-)
I think this needs more work, or I need better glasses in case I'm not spotting where this is all done. -Daniel
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 82 ++++++++++++++++++++-- 1 file changed, 76 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 7ea4abb6a896..402c989cc23d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -7,6 +7,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> #include <linux/dma-resv.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h>
#include "i915_drv.h" @@ -18,6 +19,67 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); }
+static void dmabuf_unmap_addr(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- int i;
- for_each_sg(sgl, sg, nents, i)
dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg),
dir, 0);
+}
+/**
- dmabuf_map_addr - Update LMEM address to a physical address and map the
- resource.
- @dev: valid device
- @obj: valid i915 GEM object
- @sg: scatter list to appy mapping to
- @nents: number of entries in the scatter list
- @dir: DMA direction
- The dma_address of the scatter list is the LMEM "address". From this the
- actual physical address can be determined.
- Return of 0 means error.
- */
+static int dmabuf_map_addr(struct device *dev, struct drm_i915_gem_object *obj,
struct scatterlist *sgl, int nents,
enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- phys_addr_t addr;
- int distance;
- int i;
- distance = pci_p2pdma_distance_many(obj->base.dev->pdev, &dev, 1,
true);
- if (distance < 0) {
pr_info("%s: from: %s to: %s distance: %d\n", __func__,
pci_name(obj->base.dev->pdev), dev_name(dev),
distance);
return 0;
- }
- for_each_sg(sgl, sg, nents, i) {
addr = sg_dma_address(sg) + obj->mm.region->io_start;
sg->dma_address = dma_map_resource(dev, addr, sg->length, dir,
0);
if (dma_mapping_error(dev, sg->dma_address))
goto unmap;
sg->dma_length = sg->length;
- }
- return nents;
+unmap:
- dmabuf_unmap_addr(dev, sgl, i, dir);
- return 0;
+}
static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { @@ -44,12 +106,17 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, dst = sgt->sgl; for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->nents, i) { sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst); }sg_dma_address(dst) = sg_dma_address(src);
- if (!dma_map_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC)) {
- if (i915_gem_object_has_struct_page(obj))
ret = dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
ret = dmabuf_map_addr(attach->dev, obj, sgt->sgl, sgt->nents,
dir);
- if (!ret) { ret = -ENOMEM; goto err_free_sg; }
@@ -72,9 +139,12 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attach, { struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
- dma_unmap_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- if (i915_gem_object_has_struct_page(obj))
dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
dmabuf_unmap_addr(attach->dev, sgt->sgl, sgt->nents, dir);
- sg_free_table(sgt); kfree(sgt);
-- 2.21.0
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Tuesday, April 28, 2020 11:02 AM To: Ruhl, Michael J michael.j.ruhl@intel.com Cc: dri-devel@lists.freedesktop.org; daniel@ffwll.ch; Xiong, Jianxin jianxin.xiong@intel.com Subject: Re: [PATCH 3/5] drm/i915/dmabuf: Add LMEM knowledge to dmabuf map handler
On Wed, Apr 22, 2020 at 05:25:17PM -0400, Michael J. Ruhl wrote:
LMEM backed buffer objects do not have struct page information. Instead the dma_address of the struct sg is used to store the LMEM address information (relative to the device, this is not the CPU physical address).
The dmabuf map handler requires pages to do a dma_map_xx.
Add new mapping/unmapping functions, based on the LMEM usage of the dma_address to allow LMEM backed buffer objects to be mapped.
Before mapping check the peer2peer distance to verify that P2P dma can occur.
So this is supposed to check the importer's allow_peer2peer flag, and that one is supposed to require the implementation of ->move_notify. Which requires a pile of locking changes to align with dma_resv.
Yeah, I was avoiding that step for the moment. I am not completely comfortable with the how and why of how the move_notify is supposed to work, so I left the current mechanism "as is", and am planning on adding the move_notify part as a next step.
By not doing all that you avoid the lockdep splats, but you're also breaking the peer2peer dma-buf contract big time :-)
OK. I have some concerns because of the differences between the AMD and i915 implementations. It is not clear to me how compatible they currently are, and if "matched" the implementation how that would affect the i915 driver.
I think this needs more work, or I need better glasses in case I'm not spotting where this is all done.
Nope, your glasses are good.
I will take a close look at how to incorporate the peer2peer stuff.
Mike
-Daniel
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 82
++++++++++++++++++++--
1 file changed, 76 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 7ea4abb6a896..402c989cc23d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -7,6 +7,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> #include <linux/dma-resv.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h>
#include "i915_drv.h" @@ -18,6 +19,67 @@ static struct drm_i915_gem_object
*dma_buf_to_obj(struct dma_buf *buf)
return to_intel_bo(buf->priv); }
+static void dmabuf_unmap_addr(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- int i;
- for_each_sg(sgl, sg, nents, i)
dma_unmap_resource(dev, sg_dma_address(sg),
sg_dma_len(sg),
dir, 0);
+}
+/**
- dmabuf_map_addr - Update LMEM address to a physical address and
map the
- resource.
- @dev: valid device
- @obj: valid i915 GEM object
- @sg: scatter list to appy mapping to
- @nents: number of entries in the scatter list
- @dir: DMA direction
- The dma_address of the scatter list is the LMEM "address". From this
the
- actual physical address can be determined.
- Return of 0 means error.
- */
+static int dmabuf_map_addr(struct device *dev, struct
drm_i915_gem_object *obj,
struct scatterlist *sgl, int nents,
enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- phys_addr_t addr;
- int distance;
- int i;
- distance = pci_p2pdma_distance_many(obj->base.dev->pdev, &dev,
1,
true);
- if (distance < 0) {
pr_info("%s: from: %s to: %s distance: %d\n", __func__,
pci_name(obj->base.dev->pdev), dev_name(dev),
distance);
return 0;
- }
- for_each_sg(sgl, sg, nents, i) {
addr = sg_dma_address(sg) + obj->mm.region->io_start;
sg->dma_address = dma_map_resource(dev, addr, sg-
length, dir,
0);
if (dma_mapping_error(dev, sg->dma_address))
goto unmap;
sg->dma_length = sg->length;
- }
- return nents;
+unmap:
- dmabuf_unmap_addr(dev, sgl, i, dir);
- return 0;
+}
static struct sg_table *i915_gem_map_dma_buf(struct
dma_buf_attachment *attach,
enum dma_data_direction dir)
{ @@ -44,12 +106,17 @@ static struct sg_table
*i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
dst = sgt->sgl; for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->nents, i) { sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst); }sg_dma_address(dst) = sg_dma_address(src);
- if (!dma_map_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC)) {
- if (i915_gem_object_has_struct_page(obj))
ret = dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents,
dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
ret = dmabuf_map_addr(attach->dev, obj, sgt->sgl, sgt-
nents,
dir);
- if (!ret) { ret = -ENOMEM; goto err_free_sg; }
@@ -72,9 +139,12 @@ static void i915_gem_unmap_dma_buf(struct
dma_buf_attachment *attach,
{ struct drm_i915_gem_object *obj = dma_buf_to_obj(attach- dmabuf);
- dma_unmap_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- if (i915_gem_object_has_struct_page(obj))
dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
dmabuf_unmap_addr(attach->dev, sgt->sgl, sgt->nents, dir);
- sg_free_table(sgt); kfree(sgt);
-- 2.21.0
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On Tue, Apr 28, 2020 at 05:36:03PM +0000, Ruhl, Michael J wrote:
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Tuesday, April 28, 2020 11:02 AM To: Ruhl, Michael J michael.j.ruhl@intel.com Cc: dri-devel@lists.freedesktop.org; daniel@ffwll.ch; Xiong, Jianxin jianxin.xiong@intel.com Subject: Re: [PATCH 3/5] drm/i915/dmabuf: Add LMEM knowledge to dmabuf map handler
On Wed, Apr 22, 2020 at 05:25:17PM -0400, Michael J. Ruhl wrote:
LMEM backed buffer objects do not have struct page information. Instead the dma_address of the struct sg is used to store the LMEM address information (relative to the device, this is not the CPU physical address).
The dmabuf map handler requires pages to do a dma_map_xx.
Add new mapping/unmapping functions, based on the LMEM usage of the dma_address to allow LMEM backed buffer objects to be mapped.
Before mapping check the peer2peer distance to verify that P2P dma can occur.
So this is supposed to check the importer's allow_peer2peer flag, and that one is supposed to require the implementation of ->move_notify. Which requires a pile of locking changes to align with dma_resv.
Yeah, I was avoiding that step for the moment. I am not completely comfortable with the how and why of how the move_notify is supposed to work, so I left the current mechanism "as is", and am planning on adding the move_notify part as a next step.
By not doing all that you avoid the lockdep splats, but you're also breaking the peer2peer dma-buf contract big time :-)
OK. I have some concerns because of the differences between the AMD and i915 implementations. It is not clear to me how compatible they currently are, and if "matched" the implementation how that would affect the i915 driver.
That's kinda the problem. They're not compatible :-/
I'm also pretty sure that we'll discover a bunch of places where the current debug checks and lockdep annotations we have are insufficient, and we'll need to add more to lock down this cross driver interface. -Daniel
I think this needs more work, or I need better glasses in case I'm not spotting where this is all done.
Nope, your glasses are good.
I will take a close look at how to incorporate the peer2peer stuff.
Mike
-Daniel
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com
drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 82
++++++++++++++++++++--
1 file changed, 76 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 7ea4abb6a896..402c989cc23d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -7,6 +7,7 @@ #include <linux/dma-buf.h> #include <linux/highmem.h> #include <linux/dma-resv.h> +#include <linux/pci-p2pdma.h> #include <linux/scatterlist.h>
#include "i915_drv.h" @@ -18,6 +19,67 @@ static struct drm_i915_gem_object
*dma_buf_to_obj(struct dma_buf *buf)
return to_intel_bo(buf->priv); }
+static void dmabuf_unmap_addr(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- int i;
- for_each_sg(sgl, sg, nents, i)
dma_unmap_resource(dev, sg_dma_address(sg),
sg_dma_len(sg),
dir, 0);
+}
+/**
- dmabuf_map_addr - Update LMEM address to a physical address and
map the
- resource.
- @dev: valid device
- @obj: valid i915 GEM object
- @sg: scatter list to appy mapping to
- @nents: number of entries in the scatter list
- @dir: DMA direction
- The dma_address of the scatter list is the LMEM "address". From this
the
- actual physical address can be determined.
- Return of 0 means error.
- */
+static int dmabuf_map_addr(struct device *dev, struct
drm_i915_gem_object *obj,
struct scatterlist *sgl, int nents,
enum dma_data_direction dir)
+{
- struct scatterlist *sg;
- phys_addr_t addr;
- int distance;
- int i;
- distance = pci_p2pdma_distance_many(obj->base.dev->pdev, &dev,
1,
true);
- if (distance < 0) {
pr_info("%s: from: %s to: %s distance: %d\n", __func__,
pci_name(obj->base.dev->pdev), dev_name(dev),
distance);
return 0;
- }
- for_each_sg(sgl, sg, nents, i) {
addr = sg_dma_address(sg) + obj->mm.region->io_start;
sg->dma_address = dma_map_resource(dev, addr, sg-
length, dir,
0);
if (dma_mapping_error(dev, sg->dma_address))
goto unmap;
sg->dma_length = sg->length;
- }
- return nents;
+unmap:
- dmabuf_unmap_addr(dev, sgl, i, dir);
- return 0;
+}
static struct sg_table *i915_gem_map_dma_buf(struct
dma_buf_attachment *attach,
enum dma_data_direction dir)
{ @@ -44,12 +106,17 @@ static struct sg_table
*i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
dst = sgt->sgl; for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->nents, i) { sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst); }sg_dma_address(dst) = sg_dma_address(src);
- if (!dma_map_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC)) {
- if (i915_gem_object_has_struct_page(obj))
ret = dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents,
dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
ret = dmabuf_map_addr(attach->dev, obj, sgt->sgl, sgt-
nents,
dir);
- if (!ret) { ret = -ENOMEM; goto err_free_sg; }
@@ -72,9 +139,12 @@ static void i915_gem_unmap_dma_buf(struct
dma_buf_attachment *attach,
{ struct drm_i915_gem_object *obj = dma_buf_to_obj(attach- dmabuf);
- dma_unmap_sg_attrs(attach->dev,
sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- if (i915_gem_object_has_struct_page(obj))
dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
DMA_ATTR_SKIP_CPU_SYNC);
- else
dmabuf_unmap_addr(attach->dev, sgt->sgl, sgt->nents, dir);
- sg_free_table(sgt); kfree(sgt);
-- 2.21.0
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
LMEM backed buffer objects do not have struct page information, and are not WB compatible. Currently the cpu access and vmap interfaces only support struct page backed objects.
Update the dma-buf interfaces begin/end_cpu_access and vmap/vunmap to be LMEM aware.
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 402c989cc23d..988778cc8539 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -155,7 +155,10 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
- return i915_gem_object_pin_map(obj, I915_MAP_WB); + if (i915_gem_object_has_struct_page(obj)) + return i915_gem_object_pin_map(obj, I915_MAP_WB); + else + return i915_gem_object_pin_map(obj, I915_MAP_WC); }
static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) @@ -201,7 +204,11 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire if (err) goto out;
- err = i915_gem_object_set_to_cpu_domain(obj, write); + if (i915_gem_object_has_struct_page(obj)) + err = i915_gem_object_set_to_cpu_domain(obj, write); + else + err = i915_gem_object_set_to_wc_domain(obj, write); + i915_gem_object_unlock(obj);
out: @@ -222,7 +229,9 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct if (err) goto out;
- err = i915_gem_object_set_to_gtt_domain(obj, false); + if (i915_gem_object_has_struct_page(obj)) + err = i915_gem_object_set_to_gtt_domain(obj, false); + i915_gem_object_unlock(obj);
out:
The i915 GEM dmabuf mmap interface assumes all BOs are SHMEM. When the BO is backed by LMEM, this assumption doesn't work so well.
Introduce the dmabuf mmap interface to LMEM by adding the appropriate VMA faulting mechanism.
Update dmabuf to allow for LMEM backed BOs by leveraging the gem_mman path.
Signed-off-by: Michael J. Ruhl michael.j.ruhl@intel.com CC: Brian Welty brian.welty@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 58 ++++++++-- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 103 ++++++++++-------- drivers/gpu/drm/i915/gem/i915_gem_mman.h | 8 ++ .../drm/i915/gem/selftests/i915_gem_mman.c | 10 +- 4 files changed, 118 insertions(+), 61 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 988778cc8539..72d312d04421 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -11,6 +11,8 @@ #include <linux/scatterlist.h>
#include "i915_drv.h" +#include "i915_gem_lmem.h" +#include "i915_gem_mman.h" #include "i915_gem_object.h" #include "i915_scatterlist.h"
@@ -169,7 +171,41 @@ static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) i915_gem_object_unpin_map(obj); }
-static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +/** + * i915_gem_dmabuf_update_vma - Setup VMA information for exported LMEM + * objects + * @obj: valid LMEM object + * @vma: va;od vma + * + * NOTE: on success, the final _object_put() will be done by the VMA + * vm_close() callback. + */ +static int i915_gem_dmabuf_update_vma(struct drm_i915_gem_object *obj, + struct vm_area_struct *vma) +{ + struct i915_mmap_offset *mmo; + int err; + + i915_gem_object_get(obj); + mmo = i915_gem_mmap_offset_attach(obj, I915_MMAP_TYPE_WC, NULL); + if (IS_ERR(mmo)) { + err = PTR_ERR(mmo); + goto out; + } + + err = i915_gem_update_vma_info(mmo, vma); + if (err) + goto out; + + return 0; + +out: + i915_gem_object_put(obj); + return err; +} + +static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, + struct vm_area_struct *vma) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); int ret; @@ -177,17 +213,21 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * if (obj->base.size < vma->vm_end - vma->vm_start) return -EINVAL;
- if (!obj->base.filp) - return -ENODEV; + /* shmem */ + if (obj->base.filp) { + ret = call_mmap(obj->base.filp, vma); + if (ret) + return ret; + fput(vma->vm_file); + vma->vm_file = get_file(obj->base.filp);
- ret = call_mmap(obj->base.filp, vma); - if (ret) - return ret; + return 0; + }
- fput(vma->vm_file); - vma->vm_file = get_file(obj->base.filp); + if (i915_gem_object_is_lmem(obj)) + return i915_gem_dmabuf_update_vma(obj, vma);
- return 0; + return -ENODEV; }
static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index b39c24dae64e..70b00c41f6d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -549,10 +549,10 @@ insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo) return mmo; }
-static struct i915_mmap_offset * -mmap_offset_attach(struct drm_i915_gem_object *obj, - enum i915_mmap_type mmap_type, - struct drm_file *file) +struct i915_mmap_offset * +i915_gem_mmap_offset_attach(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type, + struct drm_file *file) { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_mmap_offset *mmo; @@ -626,7 +626,7 @@ __assign_mmap_offset(struct drm_file *file, goto out; }
- mmo = mmap_offset_attach(obj, mmap_type, file); + mmo = i915_gem_mmap_offset_attach(obj, mmap_type, file); if (IS_ERR(mmo)) { err = PTR_ERR(mmo); goto out; @@ -795,56 +795,21 @@ static struct file *mmap_singleton(struct drm_i915_private *i915) return file; }
-/* - * This overcomes the limitation in drm_gem_mmap's assignment of a - * drm_gem_object as the vma->vm_private_data. Since we need to - * be able to resolve multiple mmap offsets which could be tied - * to a single gem object. - */ -int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) +int i915_gem_update_vma_info(struct i915_mmap_offset *mmo, + struct vm_area_struct *vma) { - struct drm_vma_offset_node *node; - struct drm_file *priv = filp->private_data; - struct drm_device *dev = priv->minor->dev; - struct drm_i915_gem_object *obj = NULL; - struct i915_mmap_offset *mmo = NULL; struct file *anon;
- if (drm_dev_is_unplugged(dev)) - return -ENODEV; - - rcu_read_lock(); - drm_vma_offset_lock_lookup(dev->vma_offset_manager); - node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, - vma->vm_pgoff, - vma_pages(vma)); - if (node && drm_vma_node_is_allowed(node, priv)) { - /* - * Skip 0-refcnted objects as it is in the process of being - * destroyed and will be invalid when the vma manager lock - * is released. - */ - mmo = container_of(node, struct i915_mmap_offset, vma_node); - obj = i915_gem_object_get_rcu(mmo->obj); - } - drm_vma_offset_unlock_lookup(dev->vma_offset_manager); - rcu_read_unlock(); - if (!obj) - return node ? -EACCES : -EINVAL; - - if (i915_gem_object_is_readonly(obj)) { - if (vma->vm_flags & VM_WRITE) { - i915_gem_object_put(obj); + if (i915_gem_object_is_readonly(mmo->obj)) { + if (vma->vm_flags & VM_WRITE) return -EINVAL; - } + vma->vm_flags &= ~VM_MAYWRITE; }
- anon = mmap_singleton(to_i915(dev)); - if (IS_ERR(anon)) { - i915_gem_object_put(obj); + anon = mmap_singleton(to_i915(mmo->obj->base.dev)); + if (IS_ERR(anon)) return PTR_ERR(anon); - }
vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = mmo; @@ -889,6 +854,50 @@ int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) return 0; }
+/* + * This overcomes the limitation in drm_gem_mmap's assignment of a + * drm_gem_object as the vma->vm_private_data. Since we need to + * be able to resolve multiple mmap offsets which could be tied + * to a single gem object. + */ +int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_vma_offset_node *node; + struct drm_file *priv = filp->private_data; + struct drm_device *dev = priv->minor->dev; + struct drm_i915_gem_object *obj = NULL; + struct i915_mmap_offset *mmo = NULL; + int err; + + if (drm_dev_is_unplugged(dev)) + return -ENODEV; + + rcu_read_lock(); + drm_vma_offset_lock_lookup(dev->vma_offset_manager); + node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + if (node && drm_vma_node_is_allowed(node, priv)) { + /* + * Skip 0-refcnted objects as it is in the process of being + * destroyed and will be invalid when the vma manager lock + * is released. + */ + mmo = container_of(node, struct i915_mmap_offset, vma_node); + obj = i915_gem_object_get_rcu(mmo->obj); + } + drm_vma_offset_unlock_lookup(dev->vma_offset_manager); + rcu_read_unlock(); + if (!obj) + return node ? -EACCES : -EINVAL; + + err = i915_gem_update_vma_info(mmo, vma); + if (err) + i915_gem_object_put(obj); + + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_mman.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h index 862e01b7cb69..c9721b968d3c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h @@ -10,6 +10,8 @@ #include <linux/mm_types.h> #include <linux/types.h>
+#include "gem/i915_gem_object_types.h" + struct drm_device; struct drm_file; struct drm_i915_gem_object; @@ -28,4 +30,10 @@ void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj); void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
+struct i915_mmap_offset * +i915_gem_mmap_offset_attach(struct drm_i915_gem_object *obj, + enum i915_mmap_type mmap_type, + struct drm_file *file); +int i915_gem_update_vma_info(struct i915_mmap_offset *mmo, + struct vm_area_struct *vma); #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index ef7abcb3f4ee..777f82f60079 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -572,7 +572,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, if (IS_ERR(obj)) return false;
- mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + mmo = i915_gem_mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); i915_gem_object_put(obj);
return PTR_ERR_OR_ZERO(mmo) == expected; @@ -675,7 +675,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto out; }
- mmo = mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); + mmo = i915_gem_mmap_offset_attach(obj, I915_MMAP_OFFSET_GTT, NULL); if (IS_ERR(mmo)) { pr_err("Unable to insert object into reclaimed hole\n"); err = PTR_ERR(mmo); @@ -850,7 +850,7 @@ static int __igt_mmap(struct drm_i915_private *i915, if (err) return err;
- mmo = mmap_offset_attach(obj, type, NULL); + mmo = i915_gem_mmap_offset_attach(obj, type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo);
@@ -978,7 +978,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915, if (err) return err;
- mmo = mmap_offset_attach(obj, type, NULL); + mmo = i915_gem_mmap_offset_attach(obj, type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo);
@@ -1144,7 +1144,7 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, if (!can_mmap(obj, type)) return 0;
- mmo = mmap_offset_attach(obj, type, NULL); + mmo = i915_gem_mmap_offset_attach(obj, type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo);
dri-devel@lists.freedesktop.org