This series adds support for handling compact dma scatter lists to the i915 driver. This is a dependency for the related upcoming change in the PRIME interface:
http://www.spinics.net/lists/dri-devel/msg33917.html
Imre Deak (4): drm: add helper to walk a dma scatter list a page at a time drm: handle compact dma scatter lists in drm_clflush_sg() drm/i915: handle walking compact dma scatter lists drm/i915: create compact dma scatter lists for gem objects
drivers/gpu/drm/drm_cache.c | 7 ++--- drivers/gpu/drm/i915/i915_drv.h | 17 ++++------ drivers/gpu/drm/i915/i915_gem.c | 53 +++++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 ++++---- drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++++----- include/drm/drmP.h | 44 ++++++++++++++++++++++++++ 6 files changed, 98 insertions(+), 54 deletions(-)
Add a helper to walk through a scatter list a page at a time. Needed by upcoming patches fixing the scatter list walking logic in the i915 driver.
Signed-off-by: Imre Deak imre.deak@intel.com --- include/drm/drmP.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h index fad21c9..0c0c213 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1578,6 +1578,50 @@ extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data, extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request); extern int drm_sg_free(struct drm_device *dev, void *data, struct drm_file *file_priv); +struct drm_sg_iter { + struct scatterlist *sg; + int sg_offset; + struct page *page; +}; + +static inline int +__drm_sg_iter_seek(struct drm_sg_iter *iter) +{ + while (iter->sg && iter->sg_offset >= iter->sg->length) { + iter->sg_offset -= iter->sg->length; + iter->sg = sg_next(iter->sg); + } + + return iter->sg ? 0 : -1; +} + +static inline struct page * +drm_sg_iter_next(struct drm_sg_iter *iter) +{ + struct page *page; + + if (__drm_sg_iter_seek(iter)) + return NULL; + + page = nth_page(sg_page(iter->sg), iter->sg_offset >> PAGE_SHIFT); + iter->sg_offset = (iter->sg_offset + PAGE_SIZE) & PAGE_MASK; + + return page; +} + +static inline struct page * +drm_sg_iter_start(struct drm_sg_iter *iter, struct scatterlist *sg, + unsigned long offset) +{ + iter->sg = sg; + iter->sg_offset = offset; + + return drm_sg_iter_next(iter); +} + +#define drm_for_each_sg_page(iter, sg, pgoffset) \ + for ((iter)->page = drm_sg_iter_start((iter), (sg), (pgoffset));\ + (iter)->page; (iter)->page = drm_sg_iter_next(iter))
/* ATI PCIGART support (ati_pcigart.h) */ extern int drm_ati_pcigart_init(struct drm_device *dev,
Hi Imre!
On Sat, Feb 09, 2013 at 05:27:33PM +0200, Imre Deak wrote:
Add a helper to walk through a scatter list a page at a time. Needed by upcoming patches fixing the scatter list walking logic in the i915 driver.
Nice patch, but I think this would make a rather nice addition to the common scatterlist api in scatterlist.h, maybe called sg_page_iter. There's already another helper which does cpu mappings, but it has a different use-case (gives you the page mapped, which we don't neeed and can cope with not page-aligned sg tables). With dma-buf using sg tables I expect more users of such a sg page iterator to pop up. Most possible users of this will hang around on linaro-mm-sig, so please also cc that besides the usual suspects.
Cheers, Daniel
Signed-off-by: Imre Deak imre.deak@intel.com
include/drm/drmP.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h index fad21c9..0c0c213 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1578,6 +1578,50 @@ extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data, extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request); extern int drm_sg_free(struct drm_device *dev, void *data, struct drm_file *file_priv); +struct drm_sg_iter {
- struct scatterlist *sg;
- int sg_offset;
Imo using sg_pfn_offset (i.e. sg_offset >> PAGE_SIZE) would make it clearer that this is all about iterating page-aligned sg tables.
- struct page *page;
+};
+static inline int +__drm_sg_iter_seek(struct drm_sg_iter *iter) +{
- while (iter->sg && iter->sg_offset >= iter->sg->length) {
iter->sg_offset -= iter->sg->length;
iter->sg = sg_next(iter->sg);
And adding a WARN_ON(sg->legnth & ~PAGE_MASK); here would enforce that.
- }
- return iter->sg ? 0 : -1;
+}
+static inline struct page * +drm_sg_iter_next(struct drm_sg_iter *iter) +{
- struct page *page;
- if (__drm_sg_iter_seek(iter))
return NULL;
- page = nth_page(sg_page(iter->sg), iter->sg_offset >> PAGE_SHIFT);
- iter->sg_offset = (iter->sg_offset + PAGE_SIZE) & PAGE_MASK;
- return page;
+}
+static inline struct page * +drm_sg_iter_start(struct drm_sg_iter *iter, struct scatterlist *sg,
unsigned long offset)
+{
- iter->sg = sg;
- iter->sg_offset = offset;
- return drm_sg_iter_next(iter);
+}
+#define drm_for_each_sg_page(iter, sg, pgoffset) \
- for ((iter)->page = drm_sg_iter_start((iter), (sg), (pgoffset));\
(iter)->page; (iter)->page = drm_sg_iter_next(iter))
Again, for the initialization I'd go with page numbers, not an offset in bytes.
/* ATI PCIGART support (ati_pcigart.h) */
extern int drm_ati_pcigart_init(struct drm_device *dev,
1.7.10.4
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Sat, 2013-02-09 at 19:56 +0100, Daniel Vetter wrote:
Hi Imre!
On Sat, Feb 09, 2013 at 05:27:33PM +0200, Imre Deak wrote:
Add a helper to walk through a scatter list a page at a time. Needed by upcoming patches fixing the scatter list walking logic in the i915 driver.
Nice patch, but I think this would make a rather nice addition to the common scatterlist api in scatterlist.h, maybe called sg_page_iter. There's already another helper which does cpu mappings, but it has a different use-case (gives you the page mapped, which we don't neeed and can cope with not page-aligned sg tables). With dma-buf using sg tables I expect more users of such a sg page iterator to pop up. Most possible users of this will hang around on linaro-mm-sig, so please also cc that besides the usual suspects.
Ok, I wanted to sneak this in to drm (and win some time) thinking there isn't any other user of it atm. But I agree the ideal place for it is in scatterlist.h, so will send it there.
Cheers, Daniel
Signed-off-by: Imre Deak imre.deak@intel.com
include/drm/drmP.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h index fad21c9..0c0c213 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1578,6 +1578,50 @@ extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data, extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request); extern int drm_sg_free(struct drm_device *dev, void *data, struct drm_file *file_priv); +struct drm_sg_iter {
- struct scatterlist *sg;
- int sg_offset;
Imo using sg_pfn_offset (i.e. sg_offset >> PAGE_SIZE) would make it clearer that this is all about iterating page-aligned sg tables.
- struct page *page;
+};
+static inline int +__drm_sg_iter_seek(struct drm_sg_iter *iter) +{
- while (iter->sg && iter->sg_offset >= iter->sg->length) {
iter->sg_offset -= iter->sg->length;
iter->sg = sg_next(iter->sg);
And adding a WARN_ON(sg->legnth & ~PAGE_MASK); here would enforce that.
- }
- return iter->sg ? 0 : -1;
+}
+static inline struct page * +drm_sg_iter_next(struct drm_sg_iter *iter) +{
- struct page *page;
- if (__drm_sg_iter_seek(iter))
return NULL;
- page = nth_page(sg_page(iter->sg), iter->sg_offset >> PAGE_SHIFT);
- iter->sg_offset = (iter->sg_offset + PAGE_SIZE) & PAGE_MASK;
- return page;
+}
+static inline struct page * +drm_sg_iter_start(struct drm_sg_iter *iter, struct scatterlist *sg,
unsigned long offset)
+{
- iter->sg = sg;
- iter->sg_offset = offset;
- return drm_sg_iter_next(iter);
+}
+#define drm_for_each_sg_page(iter, sg, pgoffset) \
- for ((iter)->page = drm_sg_iter_start((iter), (sg), (pgoffset));\
(iter)->page; (iter)->page = drm_sg_iter_next(iter))
Again, for the initialization I'd go with page numbers, not an offset in bytes.
Ok, can change it to use page numbers instead.
--Imre
So far the assumption was that each scatter list entry contains a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this here.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/drm_cache.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index a575cb2..40f380a 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -105,12 +105,11 @@ drm_clflush_sg(struct sg_table *st) { #if defined(CONFIG_X86) if (cpu_has_clflush) { - struct scatterlist *sg; - int i; + struct drm_sg_iter sg_iter;
mb(); - for_each_sg(st->sgl, sg, st->nents, i) - drm_clflush_page(sg_page(sg)); + drm_for_each_sg_page(&sg_iter, st->sgl, 0) + drm_clflush_page(sg_iter.page); mb();
return;
So far the assumption was that each dma scatter list entry contains only a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this everywhere in the i915 code where we walk such a list.
We'll fix the place _creating_ these lists separately in the next patch to help the reviewing/bisectability.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 17 ++++++----------- drivers/gpu/drm/i915/i915_gem.c | 22 ++++++---------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 +++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++++++++-------- 4 files changed, 29 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08c5def..0462428 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1531,17 +1531,12 @@ void i915_gem_lastclose(struct drm_device *dev); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { - struct scatterlist *sg = obj->pages->sgl; - int nents = obj->pages->nents; - while (nents > SG_MAX_SINGLE_ALLOC) { - if (n < SG_MAX_SINGLE_ALLOC - 1) - break; - - sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1); - n -= SG_MAX_SINGLE_ALLOC - 1; - nents -= SG_MAX_SINGLE_ALLOC - 1; - } - return sg_page(sg+n); + struct drm_sg_iter sg_iter; + + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, n << PAGE_SHIFT) + return sg_iter.page; + + return NULL; } static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d746177..4a199e0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -411,8 +411,7 @@ i915_gem_shmem_pread(struct drm_device *dev, int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0; - struct scatterlist *sg; - int i; + struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -441,11 +440,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
offset = args->offset;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; - - if (i < offset >> PAGE_SHIFT) - continue; + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) { + struct page *page = sg_iter.page;
if (remain <= 0) break; @@ -460,7 +456,6 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset;
- page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;
@@ -732,8 +727,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0; - int i; - struct scatterlist *sg; + struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -768,13 +762,10 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; obj->dirty = 1;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) { + struct page *page = sg_iter.page; int partial_cacheline_write;
- if (i < offset >> PAGE_SHIFT) - continue; - if (remain <= 0) break;
@@ -796,7 +787,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
- page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6a5af68..ac98792 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -62,7 +62,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = obj->pages->sgl; dst = st->sgl; for (i = 0; i < obj->pages->nents; i++) { - sg_set_page(dst, sg_page(src), PAGE_SIZE, 0); + sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); } @@ -105,7 +105,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->base.dev; - struct scatterlist *sg; + struct drm_sg_iter sg_iter; struct page **pages; int ret, i;
@@ -124,14 +124,15 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
ret = -ENOMEM;
- pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *)); + pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); if (pages == NULL) goto error;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) - pages[i] = sg_page(sg); + i = 0; + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0); + pages[i++] = sg_iter.page;
- obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL); + obj->dma_buf_vmapping = vmap(pages, i, 0, PAGE_KERNEL); drm_free_large(pages);
if (!obj->dma_buf_vmapping) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index abcba2f..834ed70 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -473,28 +473,29 @@ i915_gem_swizzle_page(struct page *page) void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { - struct scatterlist *sg; - int page_count = obj->base.size >> PAGE_SHIFT; + struct drm_sg_iter sg_iter; int i;
if (obj->bit_17 == NULL) return;
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); + i = 0; + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) { + struct page *page = sg_iter.page; char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(page); set_page_dirty(page); } + i++; } }
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) { - struct scatterlist *sg; + struct drm_sg_iter sg_iter; int page_count = obj->base.size >> PAGE_SHIFT; int i;
@@ -508,11 +509,12 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) } }
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); - if (page_to_phys(page) & (1 << 17)) + i = 0; + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) { + if (page_to_phys(sg_iter.page) & (1 << 17)) __set_bit(i, obj->bit_17); else __clear_bit(i, obj->bit_17); + i++; } }
On Sat, Feb 09, 2013 at 05:27:35PM +0200, Imre Deak wrote:
So far the assumption was that each dma scatter list entry contains only a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this everywhere in the i915 code where we walk such a list.
We'll fix the place _creating_ these lists separately in the next patch to help the reviewing/bisectability.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com
Can you please reassure me that i-g-t/tests/gem_exec_lut_handle is not too adversely affected (on a wide range of cpus). -Chris
On Sat, Feb 09, 2013 at 05:27:35PM +0200, Imre Deak wrote:
So far the assumption was that each dma scatter list entry contains only a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this everywhere in the i915 code where we walk such a list.
We'll fix the place _creating_ these lists separately in the next patch to help the reviewing/bisectability.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com
Since we now have such a nice macro to loop over sg pages ... Care to also convert the two existing (correct loops) in i915_gem_gtt.c and intel-gtt.c in a follow-up patch? -Daniel
drivers/gpu/drm/i915/i915_drv.h | 17 ++++++----------- drivers/gpu/drm/i915/i915_gem.c | 22 ++++++---------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 +++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++++++++-------- 4 files changed, 29 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08c5def..0462428 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1531,17 +1531,12 @@ void i915_gem_lastclose(struct drm_device *dev); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) {
- struct scatterlist *sg = obj->pages->sgl;
- int nents = obj->pages->nents;
- while (nents > SG_MAX_SINGLE_ALLOC) {
if (n < SG_MAX_SINGLE_ALLOC - 1)
break;
sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
n -= SG_MAX_SINGLE_ALLOC - 1;
nents -= SG_MAX_SINGLE_ALLOC - 1;
- }
- return sg_page(sg+n);
- struct drm_sg_iter sg_iter;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, n << PAGE_SHIFT)
return sg_iter.page;
- return NULL;
} static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d746177..4a199e0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -411,8 +411,7 @@ i915_gem_shmem_pread(struct drm_device *dev, int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0;
- struct scatterlist *sg;
- int i;
struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size;
@@ -441,11 +440,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
offset = args->offset;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
if (i < offset >> PAGE_SHIFT)
continue;
drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) {
struct page *page = sg_iter.page;
if (remain <= 0) break;
@@ -460,7 +456,6 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset;
page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;page = sg_page(sg);
@@ -732,8 +727,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0;
- int i;
- struct scatterlist *sg;
struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size;
@@ -768,13 +762,10 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; obj->dirty = 1;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) {
int partial_cacheline_write;struct page *page = sg_iter.page;
if (i < offset >> PAGE_SHIFT)
continue;
- if (remain <= 0) break;
@@ -796,7 +787,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;page = sg_page(sg);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6a5af68..ac98792 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -62,7 +62,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = obj->pages->sgl; dst = st->sgl; for (i = 0; i < obj->pages->nents; i++) {
sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
dst = sg_next(dst); src = sg_next(src); }sg_set_page(dst, sg_page(src), src->length, 0);
@@ -105,7 +105,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->base.dev;
- struct scatterlist *sg;
- struct drm_sg_iter sg_iter; struct page **pages; int ret, i;
@@ -124,14 +124,15 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
ret = -ENOMEM;
- pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
- pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); if (pages == NULL) goto error;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
pages[i] = sg_page(sg);
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0);
pages[i++] = sg_iter.page;
- obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
obj->dma_buf_vmapping = vmap(pages, i, 0, PAGE_KERNEL); drm_free_large(pages);
if (!obj->dma_buf_vmapping)
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index abcba2f..834ed70 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -473,28 +473,29 @@ i915_gem_swizzle_page(struct page *page) void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) {
- struct scatterlist *sg;
- int page_count = obj->base.size >> PAGE_SHIFT;
struct drm_sg_iter sg_iter; int i;
if (obj->bit_17 == NULL) return;
- for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) {
char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(page); set_page_dirty(page); }struct page *page = sg_iter.page;
}i++;
}
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) {
- struct scatterlist *sg;
- struct drm_sg_iter sg_iter; int page_count = obj->base.size >> PAGE_SHIFT; int i;
@@ -508,11 +509,12 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) } }
- for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
if (page_to_phys(page) & (1 << 17))
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) {
else __clear_bit(i, obj->bit_17);if (page_to_phys(sg_iter.page) & (1 << 17)) __set_bit(i, obj->bit_17);
}i++;
}
1.7.10.4
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Sat, 2013-02-09 at 23:51 +0100, Daniel Vetter wrote:
On Sat, Feb 09, 2013 at 05:27:35PM +0200, Imre Deak wrote:
So far the assumption was that each dma scatter list entry contains only a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this everywhere in the i915 code where we walk such a list.
We'll fix the place _creating_ these lists separately in the next patch to help the reviewing/bisectability.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com
Since we now have such a nice macro to loop over sg pages ... Care to also convert the two existing (correct loops) in i915_gem_gtt.c and intel-gtt.c in a follow-up patch?
Ok, will do.
--Imre
-Daniel
drivers/gpu/drm/i915/i915_drv.h | 17 ++++++----------- drivers/gpu/drm/i915/i915_gem.c | 22 ++++++---------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 +++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++++++++-------- 4 files changed, 29 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 08c5def..0462428 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1531,17 +1531,12 @@ void i915_gem_lastclose(struct drm_device *dev); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) {
- struct scatterlist *sg = obj->pages->sgl;
- int nents = obj->pages->nents;
- while (nents > SG_MAX_SINGLE_ALLOC) {
if (n < SG_MAX_SINGLE_ALLOC - 1)
break;
sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
n -= SG_MAX_SINGLE_ALLOC - 1;
nents -= SG_MAX_SINGLE_ALLOC - 1;
- }
- return sg_page(sg+n);
- struct drm_sg_iter sg_iter;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, n << PAGE_SHIFT)
return sg_iter.page;
- return NULL;
} static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d746177..4a199e0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -411,8 +411,7 @@ i915_gem_shmem_pread(struct drm_device *dev, int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0;
- struct scatterlist *sg;
- int i;
struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size;
@@ -441,11 +440,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
offset = args->offset;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
if (i < offset >> PAGE_SHIFT)
continue;
drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) {
struct page *page = sg_iter.page;
if (remain <= 0) break;
@@ -460,7 +456,6 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset;
page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;page = sg_page(sg);
@@ -732,8 +727,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0;
- int i;
- struct scatterlist *sg;
struct drm_sg_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size;
@@ -768,13 +762,10 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; obj->dirty = 1;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
struct page *page;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, offset) {
int partial_cacheline_write;struct page *page = sg_iter.page;
if (i < offset >> PAGE_SHIFT)
continue;
- if (remain <= 0) break;
@@ -796,7 +787,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;page = sg_page(sg);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6a5af68..ac98792 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -62,7 +62,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = obj->pages->sgl; dst = st->sgl; for (i = 0; i < obj->pages->nents; i++) {
sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
dst = sg_next(dst); src = sg_next(src); }sg_set_page(dst, sg_page(src), src->length, 0);
@@ -105,7 +105,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->base.dev;
- struct scatterlist *sg;
- struct drm_sg_iter sg_iter; struct page **pages; int ret, i;
@@ -124,14 +124,15 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
ret = -ENOMEM;
- pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
- pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); if (pages == NULL) goto error;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
pages[i] = sg_page(sg);
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0);
pages[i++] = sg_iter.page;
- obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
obj->dma_buf_vmapping = vmap(pages, i, 0, PAGE_KERNEL); drm_free_large(pages);
if (!obj->dma_buf_vmapping)
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index abcba2f..834ed70 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -473,28 +473,29 @@ i915_gem_swizzle_page(struct page *page) void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) {
- struct scatterlist *sg;
- int page_count = obj->base.size >> PAGE_SHIFT;
struct drm_sg_iter sg_iter; int i;
if (obj->bit_17 == NULL) return;
- for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) {
char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(page); set_page_dirty(page); }struct page *page = sg_iter.page;
}i++;
}
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) {
- struct scatterlist *sg;
- struct drm_sg_iter sg_iter; int page_count = obj->base.size >> PAGE_SHIFT; int i;
@@ -508,11 +509,12 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) } }
- for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
if (page_to_phys(page) & (1 << 17))
- i = 0;
- drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) {
else __clear_bit(i, obj->bit_17);if (page_to_phys(sg_iter.page) & (1 << 17)) __set_bit(i, obj->bit_17);
}i++;
}
1.7.10.4
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
So far we created a sparse dma scatter list for gem objects, where each scatter list entry represented only a single page. In the future we'll have to handle compact scatter lists too where each entry can consist of multiple pages, for example for objects imported through PRIME.
The previous patches have already fixed up all other places where the i915 driver _walked_ these lists. Here we have the corresponding fix to _create_ compact lists. It's not a performance or memory footprint improvement, but it helps to better exercise the new logic.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a199e0..1028dd5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1623,9 +1623,8 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) { - int page_count = obj->base.size / PAGE_SIZE; - struct scatterlist *sg; - int ret, i; + struct drm_sg_iter sg_iter; + int ret;
BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -1645,8 +1644,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0;
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); + drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) { + struct page *page = sg_iter.page;
if (obj->dirty) set_page_dirty(page); @@ -1740,7 +1739,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct address_space *mapping; struct sg_table *st; struct scatterlist *sg; + struct drm_sg_iter sg_iter; struct page *page; + unsigned long last_pfn = 0; /* suppress gcc warning */ gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it @@ -1770,7 +1771,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); - for_each_sg(st->sgl, sg, page_count, i) { + sg = st->sgl; + st->nents = 0; + for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (IS_ERR(page)) { i915_gem_purge(dev_priv, page_count); @@ -1793,9 +1796,18 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp &= ~(__GFP_IO | __GFP_WAIT); }
- sg_set_page(sg, page, PAGE_SIZE, 0); + if (!i || page_to_pfn(page) != last_pfn + 1) { + if (i) + sg = sg_next(sg); + st->nents++; + sg_set_page(sg, page, PAGE_SIZE, 0); + } else { + sg->length += PAGE_SIZE; + } + last_pfn = page_to_pfn(page); }
+ sg_mark_end(sg); obj->pages = st;
if (i915_gem_object_needs_bit17_swizzle(obj)) @@ -1804,8 +1816,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) return 0;
err_pages: - for_each_sg(st->sgl, sg, i, page_count) - page_cache_release(sg_page(sg)); + sg_mark_end(sg); + drm_for_each_sg_page(&sg_iter, st->sgl, 0) + page_cache_release(sg_iter.page); sg_free_table(st); kfree(st); return PTR_ERR(page);
On Sat, Feb 09, 2013 at 05:27:36PM +0200, Imre Deak wrote:
So far we created a sparse dma scatter list for gem objects, where each scatter list entry represented only a single page. In the future we'll have to handle compact scatter lists too where each entry can consist of multiple pages, for example for objects imported through PRIME.
The previous patches have already fixed up all other places where the i915 driver _walked_ these lists. Here we have the corresponding fix to _create_ compact lists. It's not a performance or memory footprint improvement, but it helps to better exercise the new logic.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com
Just a quick question: Have you checked with printks or so that we indeed create such coalesced sg entries every once in a while? -Daniel
drivers/gpu/drm/i915/i915_gem.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a199e0..1028dd5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1623,9 +1623,8 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) {
- int page_count = obj->base.size / PAGE_SIZE;
- struct scatterlist *sg;
- int ret, i;
struct drm_sg_iter sg_iter;
int ret;
BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -1645,8 +1644,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0;
- for_each_sg(obj->pages->sgl, sg, page_count, i) {
struct page *page = sg_page(sg);
drm_for_each_sg_page(&sg_iter, obj->pages->sgl, 0) {
struct page *page = sg_iter.page;
if (obj->dirty) set_page_dirty(page);
@@ -1740,7 +1739,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct address_space *mapping; struct sg_table *st; struct scatterlist *sg;
struct drm_sg_iter sg_iter; struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */ gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it
@@ -1770,7 +1771,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT);
- for_each_sg(st->sgl, sg, page_count, i) {
- sg = st->sgl;
- st->nents = 0;
- for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (IS_ERR(page)) { i915_gem_purge(dev_priv, page_count);
@@ -1793,9 +1796,18 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp &= ~(__GFP_IO | __GFP_WAIT); }
sg_set_page(sg, page, PAGE_SIZE, 0);
if (!i || page_to_pfn(page) != last_pfn + 1) {
if (i)
sg = sg_next(sg);
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
sg->length += PAGE_SIZE;
}
last_pfn = page_to_pfn(page);
}
sg_mark_end(sg); obj->pages = st;
if (i915_gem_object_needs_bit17_swizzle(obj))
@@ -1804,8 +1816,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) return 0;
err_pages:
- for_each_sg(st->sgl, sg, i, page_count)
page_cache_release(sg_page(sg));
- sg_mark_end(sg);
- drm_for_each_sg_page(&sg_iter, st->sgl, 0)
sg_free_table(st); kfree(st); return PTR_ERR(page);page_cache_release(sg_iter.page);
-- 1.7.10.4
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
On Sat, 2013-02-09 at 19:59 +0100, Daniel Vetter wrote:
On Sat, Feb 09, 2013 at 05:27:36PM +0200, Imre Deak wrote:
So far we created a sparse dma scatter list for gem objects, where each scatter list entry represented only a single page. In the future we'll have to handle compact scatter lists too where each entry can consist of multiple pages, for example for objects imported through PRIME.
The previous patches have already fixed up all other places where the i915 driver _walked_ these lists. Here we have the corresponding fix to _create_ compact lists. It's not a performance or memory footprint improvement, but it helps to better exercise the new logic.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com
Just a quick question: Have you checked with printks or so that we indeed create such coalesced sg entries every once in a while?
Yes, quite often that was the case, so we at least are really testing the thing..
This series adds support for handling compact dma scatter lists to the i915 driver. This is a dependency for the related upcoming change in the PRIME interface:
http://www.spinics.net/lists/dri-devel/msg33917.html
v2: - Add the sg_for_each_page macro to /lib/scatterlist instead of drivers/drm. This is a separate patchset being merged through the -mm tree. - Use the sg_for_each_page macro in the gtt pte setup code too.
Imre Deak (4): drm: handle compact dma scatter lists in drm_clflush_sg() drm/i915: handle walking compact dma scatter lists drm/i915: create compact dma scatter lists for gem objects drm/i915: use for_each_sg_page for setting up the gtt ptes
drivers/gpu/drm/drm_cache.c | 7 ++-- drivers/gpu/drm/i915/i915_drv.h | 17 +++----- drivers/gpu/drm/i915/i915_gem.c | 55 ++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 ++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 67 ++++++++++++-------------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 18 +++++---- 6 files changed, 80 insertions(+), 97 deletions(-)
So far the assumption was that each scatter list entry contains a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this here.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/drm_cache.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index a575cb2..bc8edbe 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -105,12 +105,11 @@ drm_clflush_sg(struct sg_table *st) { #if defined(CONFIG_X86) if (cpu_has_clflush) { - struct scatterlist *sg; - int i; + struct sg_page_iter sg_iter;
mb(); - for_each_sg(st->sgl, sg, st->nents, i) - drm_clflush_page(sg_page(sg)); + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) + drm_clflush_page(sg_iter.page); mb();
return;
So far the assumption was that each dma scatter list entry contains only a single page. This might not hold in the future, when we'll introduce compact scatter lists, so prepare for this everywhere in the i915 code where we walk such a list.
We'll fix the place _creating_ these lists separately in the next patch to help the reviewing/bisectability.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 17 ++++++----------- drivers/gpu/drm/i915/i915_gem.c | 24 ++++++++---------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 13 +++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++++++++-------- 4 files changed, 31 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e95337c..2c03636 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1528,17 +1528,12 @@ void i915_gem_lastclose(struct drm_device *dev); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { - struct scatterlist *sg = obj->pages->sgl; - int nents = obj->pages->nents; - while (nents > SG_MAX_SINGLE_ALLOC) { - if (n < SG_MAX_SINGLE_ALLOC - 1) - break; - - sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1); - n -= SG_MAX_SINGLE_ALLOC - 1; - nents -= SG_MAX_SINGLE_ALLOC - 1; - } - return sg_page(sg+n); + struct sg_page_iter sg_iter; + + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, n) + return sg_iter.page; + + return NULL; } static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8413ffc..03037cf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -411,8 +411,7 @@ i915_gem_shmem_pread(struct drm_device *dev, int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0; - struct scatterlist *sg; - int i; + struct sg_page_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -441,11 +440,9 @@ i915_gem_shmem_pread(struct drm_device *dev,
offset = args->offset;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; - - if (i < offset >> PAGE_SHIFT) - continue; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, + offset >> PAGE_SHIFT) { + struct page *page = sg_iter.page;
if (remain <= 0) break; @@ -460,7 +457,6 @@ i915_gem_shmem_pread(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset;
- page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;
@@ -732,8 +728,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0; - int i; - struct scatterlist *sg; + struct sg_page_iter sg_iter;
user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; @@ -768,13 +763,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; obj->dirty = 1;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) { - struct page *page; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, + offset >> PAGE_SHIFT) { + struct page *page = sg_iter.page; int partial_cacheline_write;
- if (i < offset >> PAGE_SHIFT) - continue; - if (remain <= 0) break;
@@ -796,7 +789,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1));
- page = sg_page(sg); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 6a5af68..898615d 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -62,7 +62,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme src = obj->pages->sgl; dst = st->sgl; for (i = 0; i < obj->pages->nents; i++) { - sg_set_page(dst, sg_page(src), PAGE_SIZE, 0); + sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); } @@ -105,7 +105,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->base.dev; - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page **pages; int ret, i;
@@ -124,14 +124,15 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
ret = -ENOMEM;
- pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *)); + pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages)); if (pages == NULL) goto error;
- for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) - pages[i] = sg_page(sg); + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0); + pages[i++] = sg_iter.page;
- obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL); + obj->dma_buf_vmapping = vmap(pages, i, 0, PAGE_KERNEL); drm_free_large(pages);
if (!obj->dma_buf_vmapping) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index abcba2f..f799708 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -473,28 +473,29 @@ i915_gem_swizzle_page(struct page *page) void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { - struct scatterlist *sg; - int page_count = obj->base.size >> PAGE_SHIFT; + struct sg_page_iter sg_iter; int i;
if (obj->bit_17 == NULL) return;
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { + struct page *page = sg_iter.page; char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(page); set_page_dirty(page); } + i++; } }
void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; int page_count = obj->base.size >> PAGE_SHIFT; int i;
@@ -508,11 +509,12 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) } }
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); - if (page_to_phys(page) & (1 << 17)) + i = 0; + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { + if (page_to_phys(sg_iter.page) & (1 << 17)) __set_bit(i, obj->bit_17); else __clear_bit(i, obj->bit_17); + i++; } }
So far we created a sparse dma scatter list for gem objects, where each scatter list entry represented only a single page. In the future we'll have to handle compact scatter lists too where each entry can consist of multiple pages, for example for objects imported through PRIME.
The previous patches have already fixed up all other places where the i915 driver _walked_ these lists. Here we have the corresponding fix to _create_ compact lists. It's not a performance or memory footprint improvement, but it helps to better exercise the new logic.
Reference: http://www.spinics.net/lists/dri-devel/msg33917.html Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 03037cf..ec2f11f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1625,9 +1625,8 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) { - int page_count = obj->base.size / PAGE_SIZE; - struct scatterlist *sg; - int ret, i; + struct sg_page_iter sg_iter; + int ret;
BUG_ON(obj->madv == __I915_MADV_PURGED);
@@ -1647,8 +1646,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0;
- for_each_sg(obj->pages->sgl, sg, page_count, i) { - struct page *page = sg_page(sg); + for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { + struct page *page = sg_iter.page;
if (obj->dirty) set_page_dirty(page); @@ -1749,7 +1748,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct address_space *mapping; struct sg_table *st; struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; + unsigned long last_pfn = 0; /* suppress gcc warning */ gfp_t gfp;
/* Assert that the object is not currently in any GPU domain. As it @@ -1779,7 +1780,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); - for_each_sg(st->sgl, sg, page_count, i) { + sg = st->sgl; + st->nents = 0; + for (i = 0; i < page_count; i++) { page = shmem_read_mapping_page_gfp(mapping, i, gfp); if (IS_ERR(page)) { i915_gem_purge(dev_priv, page_count); @@ -1802,9 +1805,18 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) gfp &= ~(__GFP_IO | __GFP_WAIT); }
- sg_set_page(sg, page, PAGE_SIZE, 0); + if (!i || page_to_pfn(page) != last_pfn + 1) { + if (i) + sg = sg_next(sg); + st->nents++; + sg_set_page(sg, page, PAGE_SIZE, 0); + } else { + sg->length += PAGE_SIZE; + } + last_pfn = page_to_pfn(page); }
+ sg_mark_end(sg); obj->pages = st;
if (i915_gem_object_needs_bit17_swizzle(obj)) @@ -1813,8 +1825,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) return 0;
err_pages: - for_each_sg(st->sgl, sg, i, page_count) - page_cache_release(sg_page(sg)); + sg_mark_end(sg); + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) + page_cache_release(sg_iter.page); sg_free_table(st); kfree(st); return PTR_ERR(page);
The existing gtt setup code is correct - and so doesn't need to be fixed to handle compact dma scatter lists similarly to the previous patches. Still, take the for_each_sg_page macro into use, to get somewhat simpler code.
Signed-off-by: Imre Deak imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++---------------------- 1 file changed, 24 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 926a1e2..0c32054 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -116,41 +116,26 @@ static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt, { gtt_pte_t *pt_vaddr; unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES; - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; - unsigned i, j, m, segment_len; - dma_addr_t page_addr; - struct scatterlist *sg; - - /* init sg walking */ - sg = pages->sgl; - i = 0; - segment_len = sg_dma_len(sg) >> PAGE_SHIFT; - m = 0; - - while (i < pages->nents) { - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); - - for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) { - page_addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - pt_vaddr[j] = gen6_pte_encode(ppgtt->dev, page_addr, - cache_level); - - /* grab the next page */ - if (++m == segment_len) { - if (++i == pages->nents) - break; + unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; + struct sg_page_iter sg_iter; + + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); + for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { + dma_addr_t page_addr; + + page_addr = sg_dma_address(sg_iter.sg) + + (sg_iter.sg_pgoffset << PAGE_SHIFT); + pt_vaddr[act_pte] = gen6_pte_encode(ppgtt->dev, page_addr, + cache_level); + if (++act_pte == I915_PPGTT_PT_ENTRIES) { + kunmap_atomic(pt_vaddr); + act_pd++; + pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]); + act_pte = 0;
- sg = sg_next(sg); - segment_len = sg_dma_len(sg) >> PAGE_SHIFT; - m = 0; - } } - - kunmap_atomic(pt_vaddr); - - first_pte = 0; - act_pd++; } + kunmap_atomic(pt_vaddr); }
static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) @@ -432,21 +417,17 @@ static void gen6_ggtt_insert_entries(struct drm_device *dev, enum i915_cache_level level) { struct drm_i915_private *dev_priv = dev->dev_private; - struct scatterlist *sg = st->sgl; gtt_pte_t __iomem *gtt_entries = (gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; - int unused, i = 0; - unsigned int len, m = 0; + int i = 0; + struct sg_page_iter sg_iter; dma_addr_t addr;
- for_each_sg(st->sgl, sg, st->nents, unused) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - for (m = 0; m < len; m++) { - addr = sg_dma_address(sg) + (m << PAGE_SHIFT); - iowrite32(gen6_pte_encode(dev, addr, level), - >t_entries[i]); - i++; - } + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { + addr = sg_dma_address(sg_iter.sg) + + (sg_iter.sg_pgoffset << PAGE_SHIFT); + iowrite32(gen6_pte_encode(dev, addr, level), >t_entries[i]); + i++; }
/* XXX: This serves as a posting read to make sure that the PTE has
On Mon, Feb 18, 2013 at 07:28:04PM +0200, Imre Deak wrote:
The existing gtt setup code is correct - and so doesn't need to be fixed to handle compact dma scatter lists similarly to the previous patches. Still, take the for_each_sg_page macro into use, to get somewhat simpler code.
Signed-off-by: Imre Deak imre.deak@intel.com
Series slurped into dinq, thanks. -Daniel
dri-devel@lists.freedesktop.org