From: Felix Kuehling Felix.Kuehling@amd.com
TTM BO accounting is out of sync with how memory is really allocated in ttm[_dma]_tt_alloc_page_directory. This resulted in excessive estimated overhead with many small allocations.
ttm_dma_tt_alloc_page_directory makes a single allocation for three arrays: pages, DMA and CPU addresses. It uses drm_calloc_large, which uses kmalloc internally for allocations smaller than PAGE_SIZE. ttm_round_pot should be a good approximation of its memory usage both above and below PAGE_SIZE.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Monk Liu monk.liu@amd.com Reviewed-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4cbf265..870a87a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1215,7 +1215,7 @@ size_t ttm_bo_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size); - size += PAGE_ALIGN(npages * sizeof(void *)); + size += ttm_round_pot(npages * sizeof(void *)); size += ttm_round_pot(sizeof(struct ttm_tt)); return size; } @@ -1229,8 +1229,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size); - size += PAGE_ALIGN(npages * sizeof(void *)); - size += PAGE_ALIGN(npages * sizeof(dma_addr_t)); + size += ttm_round_pot(npages * (2*sizeof(void *) + sizeof(dma_addr_t))); size += ttm_round_pot(sizeof(struct ttm_dma_tt)); return size; }
On 04/12/2016 05:57 PM, Alex Deucher wrote:
From: Felix Kuehling Felix.Kuehling@amd.com
TTM BO accounting is out of sync with how memory is really allocated in ttm[_dma]_tt_alloc_page_directory. This resulted in excessive estimated overhead with many small allocations.
ttm_dma_tt_alloc_page_directory makes a single allocation for three arrays: pages, DMA and CPU addresses. It uses drm_calloc_large, which uses kmalloc internally for allocations smaller than PAGE_SIZE. ttm_round_pot should be a good approximation of its memory usage both above and below PAGE_SIZE.
I think for allocations larger than PAGE_SIZE, ttm_round_pot() will overestimate. You should probably use the smaller of the two.
/Thomas
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Monk Liu monk.liu@amd.com Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4cbf265..870a87a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1215,7 +1215,7 @@ size_t ttm_bo_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += ttm_round_pot(npages * sizeof(void *)); size += ttm_round_pot(sizeof(struct ttm_tt)); return size;
} @@ -1229,8 +1229,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += PAGE_ALIGN(npages * sizeof(dma_addr_t));
- size += ttm_round_pot(npages * (2*sizeof(void *) + sizeof(dma_addr_t))); size += ttm_round_pot(sizeof(struct ttm_dma_tt)); return size;
}
This is the implementation of ttm_round_pot:
size_t ttm_round_pot(size_t size) { if ((size & (size - 1)) == 0) return size; else if (size > PAGE_SIZE) return PAGE_ALIGN(size); else { size_t tmp_size = 4;
while (tmp_size < size) tmp_size <<= 1;
return tmp_size; } return 0; }
As you can see, for size > PAGE_SIZE it just returns PAGE_ALIGN(size). It does not actually round to a power of two in this case.
Regards, Felix
On 16-04-12 12:21 PM, Thomas Hellstrom wrote:
On 04/12/2016 05:57 PM, Alex Deucher wrote:
From: Felix Kuehling Felix.Kuehling@amd.com
TTM BO accounting is out of sync with how memory is really allocated in ttm[_dma]_tt_alloc_page_directory. This resulted in excessive estimated overhead with many small allocations.
ttm_dma_tt_alloc_page_directory makes a single allocation for three arrays: pages, DMA and CPU addresses. It uses drm_calloc_large, which uses kmalloc internally for allocations smaller than PAGE_SIZE. ttm_round_pot should be a good approximation of its memory usage both above and below PAGE_SIZE.
I think for allocations larger than PAGE_SIZE, ttm_round_pot() will overestimate. You should probably use the smaller of the two.
/Thomas
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Monk Liu monk.liu@amd.com Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4cbf265..870a87a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1215,7 +1215,7 @@ size_t ttm_bo_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += ttm_round_pot(npages * sizeof(void *)); size += ttm_round_pot(sizeof(struct ttm_tt)); return size;
} @@ -1229,8 +1229,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += PAGE_ALIGN(npages * sizeof(dma_addr_t));
- size += ttm_round_pot(npages * (2*sizeof(void *) + sizeof(dma_addr_t))); size += ttm_round_pot(sizeof(struct ttm_dma_tt)); return size;
}
On 04/12/2016 06:39 PM, Felix Kuehling wrote:
This is the implementation of ttm_round_pot:
size_t ttm_round_pot(size_t size) { if ((size & (size - 1)) == 0) return size; else if (size > PAGE_SIZE) return PAGE_ALIGN(size); else { size_t tmp_size = 4;
while (tmp_size < size) tmp_size <<= 1; return tmp_size; } return 0;
}
As you can see, for size > PAGE_SIZE it just returns PAGE_ALIGN(size). It does not actually round to a power of two in this case.
Regards, Felix
Ah yes. I should've refreshed my memory.
Reviewed-by: Thomas Hellstrom thellstrom@vmware.com
On 16-04-12 12:21 PM, Thomas Hellstrom wrote:
On 04/12/2016 05:57 PM, Alex Deucher wrote:
From: Felix Kuehling Felix.Kuehling@amd.com
TTM BO accounting is out of sync with how memory is really allocated in ttm[_dma]_tt_alloc_page_directory. This resulted in excessive estimated overhead with many small allocations.
ttm_dma_tt_alloc_page_directory makes a single allocation for three arrays: pages, DMA and CPU addresses. It uses drm_calloc_large, which uses kmalloc internally for allocations smaller than PAGE_SIZE. ttm_round_pot should be a good approximation of its memory usage both above and below PAGE_SIZE.
I think for allocations larger than PAGE_SIZE, ttm_round_pot() will overestimate. You should probably use the smaller of the two.
/Thomas
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Monk Liu monk.liu@amd.com Reviewed-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4cbf265..870a87a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1215,7 +1215,7 @@ size_t ttm_bo_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += ttm_round_pot(npages * sizeof(void *)); size += ttm_round_pot(sizeof(struct ttm_tt)); return size;
} @@ -1229,8 +1229,7 @@ size_t ttm_bo_dma_acc_size(struct ttm_bo_device *bdev, size_t size = 0;
size += ttm_round_pot(struct_size);
- size += PAGE_ALIGN(npages * sizeof(void *));
- size += PAGE_ALIGN(npages * sizeof(dma_addr_t));
- size += ttm_round_pot(npages * (2*sizeof(void *) + sizeof(dma_addr_t))); size += ttm_round_pot(sizeof(struct ttm_dma_tt)); return size;
}
dri-devel@lists.freedesktop.org