add this for correctly updating global mem count in ttm_mem_zone. before that when ttm_mem_global_alloc_page fails, we would update all dma_page's global mem count in ttm_dma->pages_list. but actually here we should not update for the last dma_page.
v2: only the update of last dma_page is not right v3: use lower bits of dma_page vaddr
Signed-off-by: Roger He Hongbo.He@amd.com --- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 55 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index c7f01a4..a880515 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -61,6 +61,7 @@ #define SMALL_ALLOCATION 4 #define FREE_ALL_PAGES (~0U) #define VADDR_FLAG_HUGE_POOL 1UL +#define VADDR_FLAG_UPDATED_COUNT 2UL
enum pool_type { IS_UNDEFINED = 0, @@ -874,18 +875,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, }
/* - * @return count of pages still required to fulfill the request. * The populate list is actually a stack (not that is matters as TTM * allocates one page at a time. + * return dma_page pointer if success, otherwise NULL. */ -static int ttm_dma_pool_get_pages(struct dma_pool *pool, +static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool, struct ttm_dma_tt *ttm_dma, unsigned index) { - struct dma_page *d_page; + struct dma_page *d_page = NULL; struct ttm_tt *ttm = &ttm_dma->ttm; unsigned long irq_flags; - int count, r = -ENOMEM; + int count;
spin_lock_irqsave(&pool->lock, irq_flags); count = ttm_dma_page_pool_fill_locked(pool, &irq_flags); @@ -894,12 +895,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool, ttm->pages[index] = d_page->p; ttm_dma->dma_address[index] = d_page->dma; list_move_tail(&d_page->page_list, &ttm_dma->pages_list); - r = 0; pool->npages_in_use += 1; pool->npages_free -= 1; } spin_unlock_irqrestore(&pool->lock, irq_flags); - return r; + return d_page; }
static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) @@ -934,6 +934,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; unsigned long num_pages = ttm->num_pages; struct dma_pool *pool; + struct dma_page *d_page; enum pool_type type; unsigned i; int ret; @@ -962,8 +963,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, while (num_pages >= HPAGE_PMD_NR) { unsigned j;
- ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); - if (ret != 0) + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); + if (!d_page) break;
ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], @@ -973,6 +974,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; }
+ d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT; for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) { ttm->pages[j] = ttm->pages[j - 1] + 1; ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] + @@ -996,8 +998,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, }
while (num_pages) { - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); - if (ret != 0) { + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); + if (!d_page) { ttm_dma_unpopulate(ttm_dma, dev); return -ENOMEM; } @@ -1009,6 +1011,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; }
+ d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT; ++i; --num_pages; } @@ -1049,8 +1052,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) continue;
count++; - ttm_mem_global_free_page(ttm->glob->mem_glob, - d_page->p, pool->size); + if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) { + ttm_mem_global_free_page(ttm->glob->mem_glob, + d_page->p, pool->size); + d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT; + } ttm_dma_page_put(pool, d_page); }
@@ -1070,9 +1076,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
/* make sure pages array match list and count number of pages */ count = 0; - list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) { + list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, + page_list) { ttm->pages[count] = d_page->p; count++; + + if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) { + ttm_mem_global_free_page(ttm->glob->mem_glob, + d_page->p, pool->size); + d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT; + } + + if (is_cached) + ttm_dma_page_put(pool, d_page); }
spin_lock_irqsave(&pool->lock, irq_flags); @@ -1092,19 +1108,6 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) } spin_unlock_irqrestore(&pool->lock, irq_flags);
- if (is_cached) { - list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) { - ttm_mem_global_free_page(ttm->glob->mem_glob, - d_page->p, pool->size); - ttm_dma_page_put(pool, d_page); - } - } else { - for (i = 0; i < count; i++) { - ttm_mem_global_free_page(ttm->glob->mem_glob, - ttm->pages[i], pool->size); - } - } - INIT_LIST_HEAD(&ttm_dma->pages_list); for (i = 0; i < ttm->num_pages; i++) { ttm->pages[i] = NULL;
Am 17.01.2018 um 09:59 schrieb Roger He:
add this for correctly updating global mem count in ttm_mem_zone. before that when ttm_mem_global_alloc_page fails, we would update all dma_page's global mem count in ttm_dma->pages_list. but actually here we should not update for the last dma_page.
v2: only the update of last dma_page is not right v3: use lower bits of dma_page vaddr
Signed-off-by: Roger He Hongbo.He@amd.com
Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 55 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index c7f01a4..a880515 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -61,6 +61,7 @@ #define SMALL_ALLOCATION 4 #define FREE_ALL_PAGES (~0U) #define VADDR_FLAG_HUGE_POOL 1UL +#define VADDR_FLAG_UPDATED_COUNT 2UL
enum pool_type { IS_UNDEFINED = 0, @@ -874,18 +875,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, }
/*
- @return count of pages still required to fulfill the request.
- The populate list is actually a stack (not that is matters as TTM
- allocates one page at a time.
*/
- return dma_page pointer if success, otherwise NULL.
-static int ttm_dma_pool_get_pages(struct dma_pool *pool, +static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool, struct ttm_dma_tt *ttm_dma, unsigned index) {
- struct dma_page *d_page;
- struct dma_page *d_page = NULL; struct ttm_tt *ttm = &ttm_dma->ttm; unsigned long irq_flags;
- int count, r = -ENOMEM;
int count;
spin_lock_irqsave(&pool->lock, irq_flags); count = ttm_dma_page_pool_fill_locked(pool, &irq_flags);
@@ -894,12 +895,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool, ttm->pages[index] = d_page->p; ttm_dma->dma_address[index] = d_page->dma; list_move_tail(&d_page->page_list, &ttm_dma->pages_list);
pool->npages_in_use += 1; pool->npages_free -= 1; } spin_unlock_irqrestore(&pool->lock, irq_flags);r = 0;
- return r;
return d_page; }
static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
@@ -934,6 +934,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; unsigned long num_pages = ttm->num_pages; struct dma_pool *pool;
- struct dma_page *d_page; enum pool_type type; unsigned i; int ret;
@@ -962,8 +963,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, while (num_pages >= HPAGE_PMD_NR) { unsigned j;
ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
if (ret != 0)
d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i);
if (!d_page) break;
ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i],
@@ -973,6 +974,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; }
for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) { ttm->pages[j] = ttm->pages[j - 1] + 1; ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] +d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT;
@@ -996,8 +998,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, }
while (num_pages) {
ret = ttm_dma_pool_get_pages(pool, ttm_dma, i);
if (ret != 0) {
d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i);
}if (!d_page) { ttm_dma_unpopulate(ttm_dma, dev); return -ENOMEM;
@@ -1009,6 +1011,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; }
++i; --num_pages; }d_page->vaddr |= VADDR_FLAG_UPDATED_COUNT;
@@ -1049,8 +1052,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) continue;
count++;
ttm_mem_global_free_page(ttm->glob->mem_glob,
d_page->p, pool->size);
if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) {
ttm_mem_global_free_page(ttm->glob->mem_glob,
d_page->p, pool->size);
d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT;
}} ttm_dma_page_put(pool, d_page);
@@ -1070,9 +1076,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
/* make sure pages array match list and count number of pages */ count = 0;
- list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) {
list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list,
page_list) {
ttm->pages[count] = d_page->p; count++;
if (d_page->vaddr & VADDR_FLAG_UPDATED_COUNT) {
ttm_mem_global_free_page(ttm->glob->mem_glob,
d_page->p, pool->size);
d_page->vaddr &= ~VADDR_FLAG_UPDATED_COUNT;
}
if (is_cached)
ttm_dma_page_put(pool, d_page);
}
spin_lock_irqsave(&pool->lock, irq_flags);
@@ -1092,19 +1108,6 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) } spin_unlock_irqrestore(&pool->lock, irq_flags);
- if (is_cached) {
list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) {
ttm_mem_global_free_page(ttm->glob->mem_glob,
d_page->p, pool->size);
ttm_dma_page_put(pool, d_page);
}
- } else {
for (i = 0; i < count; i++) {
ttm_mem_global_free_page(ttm->glob->mem_glob,
ttm->pages[i], pool->size);
}
- }
- INIT_LIST_HEAD(&ttm_dma->pages_list); for (i = 0; i < ttm->num_pages; i++) { ttm->pages[i] = NULL;
dri-devel@lists.freedesktop.org