Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 66 ++++++++++++++++++++++++++++++++++++++++- include/drm/ttm/ttm_bo_driver.h | 12 ++++++++ 2 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, + unsigned long offset, + void *buf, int len, int write) +{ + unsigned long first_page = offset >> PAGE_SHIFT; + unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT; + unsigned long num_pages = last_page - first_page + 1; + struct ttm_bo_kmap_obj map; + void *ptr; + bool is_iomem; + int ret; + + ret = ttm_bo_kmap(bo, first_page, num_pages, &map); + if (ret) + return ret; + + offset -= first_page << PAGE_SHIFT; + ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset; + WARN_ON(is_iomem); + if (write) + memcpy(ptr, buf, len); + else + memcpy(buf, ptr, len); + ttm_bo_kunmap(&map); + + return len; +} + +static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + unsigned long offset = (addr) - vma->vm_start; + struct ttm_buffer_object *bo = vma->vm_private_data; + int ret; + + if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages) + return -EIO; + + ret = ttm_bo_reserve(bo, true, false, NULL); + if (ret) + return ret; + + switch(bo->mem.mem_type) { + case TTM_PL_SYSTEM: + case TTM_PL_TT: + ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); + break; + case TTM_PL_VRAM: + if (bo->bdev->driver->access_vram) + ret = bo->bdev->driver->access_vram( + bo, offset, buf, len, write); + else + ret = -EIO; + break; + default: + ret = -EIO; + } + + ttm_bo_unreserve(bo); + + return ret; +} + static const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, - .close = ttm_bo_vm_close + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access };
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 6bbd34d..6ce5094 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -471,6 +471,18 @@ struct ttm_bo_driver { */ unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo, unsigned long page_offset); + + /** + * Read/write VRAM buffers for ptrace access + * + * @bo: the BO to access + * @offset: the offset from the start of the BO + * @buf: pointer to source/destination buffer + * @len: number of bytes to copy + * @write: whether to read (0) from or write (non-0) to BO + */ + int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset, + void *buf, int len, int write); };
/**
Allows gdb to access contents of user mode mapped VRAM BOs.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ 2 files changed, 61 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff5614b..d65551d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); }
+static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo, + unsigned long offset, + void *buf, int len, int write) +{ + struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + uint32_t value = 0; + int result = 0; + uint64_t pos; + unsigned long flags; + + while (offset >= (nodes->size << PAGE_SHIFT)) { + offset -= nodes->size << PAGE_SHIFT; + ++nodes; + } + pos = (nodes->start << PAGE_SHIFT) + offset; + + while (len && pos < adev->mc.mc_vram_size) { + uint64_t aligned_pos = pos & ~(uint64_t)3; + uint32_t bytes = 4 - (pos & 3); + uint32_t shift = (pos & 3) * 8; + uint32_t mask = 0xffffffff << shift; + + if (len < bytes) { + mask &= 0xffffffff >> (bytes - len) * 8; + bytes = len; + } + + spin_lock_irqsave(&adev->mmio_idx_lock, flags); + WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000); + WREG32(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0xffffffff) + value = RREG32(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32(mmMM_DATA, value); + } + spin_unlock_irqrestore(&adev->mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, &value, bytes); + } + + result += bytes; + buf = (uint8_t *)buf + bytes; + pos += bytes; + len -= bytes; + if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) { + ++nodes; + pos = (nodes->start << PAGE_SHIFT); + } + } + + return result; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, + .access_vram = &amdgpu_ttm_access_vram };
int amdgpu_ttm_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f137c24..a22e430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma, + struct ttm_bo_device *bdev); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
On 17-07-13 05:08 PM, Felix Kuehling wrote:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
struct ttm_bo_device *bdev);
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Oops, this is a remnant from the old version that hacked the ttm_vm_ops in amdgpu. I'll remove this before I submit.
On 14/07/17 06:23 AM, Felix Kuehling wrote:
On 17-07-13 05:08 PM, Felix Kuehling wrote:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
struct ttm_bo_device *bdev);
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Oops, this is a remnant from the old version that hacked the ttm_vm_ops in amdgpu. I'll remove this before I submit.
With that fixed (and possibly excluding driver-private memory types if necessary, per discussion of patch 1),
Reviewed-by: Michel Dänzer michel.daenzer@amd.com
Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
Allows gdb to access contents of user mode mapped VRAM BOs.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ 2 files changed, 61 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff5614b..d65551d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); }
+static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
- struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
- uint32_t value = 0;
- int result = 0;
- uint64_t pos;
- unsigned long flags;
- while (offset >= (nodes->size << PAGE_SHIFT)) {
offset -= nodes->size << PAGE_SHIFT;
++nodes;
- }
- pos = (nodes->start << PAGE_SHIFT) + offset;
This silently assumes that a read would never cross a node boundary, doesn't it?
Christian.
- while (len && pos < adev->mc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
uint32_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0xffffffff << shift;
if (len < bytes) {
mask &= 0xffffffff >> (bytes - len) * 8;
bytes = len;
}
spin_lock_irqsave(&adev->mmio_idx_lock, flags);
WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
if (!write || mask != 0xffffffff)
value = RREG32(mmMM_DATA);
if (write) {
value &= ~mask;
value |= (*(uint32_t *)buf << shift) & mask;
WREG32(mmMM_DATA, value);
}
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
if (!write) {
value = (value & mask) >> shift;
memcpy(buf, &value, bytes);
}
result += bytes;
buf = (uint8_t *)buf + bytes;
pos += bytes;
len -= bytes;
if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
++nodes;
pos = (nodes->start << PAGE_SHIFT);
}
- }
- return result;
+}
- static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
.access_vram = &amdgpu_ttm_access_vram };
int amdgpu_ttm_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f137c24..a22e430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence);
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);struct ttm_bo_device *bdev);
On 17-07-14 06:08 AM, Christian König wrote:
Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
Allows gdb to access contents of user mode mapped VRAM BOs.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 59 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 ++ 2 files changed, 61 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff5614b..d65551d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1115,6 +1115,64 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return ttm_bo_eviction_valuable(bo, place); } +static int amdgpu_ttm_access_vram(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
- struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
- uint32_t value = 0;
- int result = 0;
- uint64_t pos;
- unsigned long flags;
- while (offset >= (nodes->size << PAGE_SHIFT)) {
offset -= nodes->size << PAGE_SHIFT;
++nodes;
- }
- pos = (nodes->start << PAGE_SHIFT) + offset;
This silently assumes that a read would never cross a node boundary, doesn't it?
It doesn't. See below ...
Christian.
- while (len && pos < adev->mc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
uint32_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0xffffffff << shift;
if (len < bytes) {
mask &= 0xffffffff >> (bytes - len) * 8;
bytes = len;
}
spin_lock_irqsave(&adev->mmio_idx_lock, flags);
WREG32(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
WREG32(mmMM_INDEX_HI, aligned_pos >> 31);
if (!write || mask != 0xffffffff)
value = RREG32(mmMM_DATA);
if (write) {
value &= ~mask;
value |= (*(uint32_t *)buf << shift) & mask;
WREG32(mmMM_DATA, value);
}
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
if (!write) {
value = (value & mask) >> shift;
memcpy(buf, &value, bytes);
}
result += bytes;
buf = (uint8_t *)buf + bytes;
pos += bytes;
len -= bytes;
if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
++nodes;
pos = (nodes->start << PAGE_SHIFT);
... Here I handle crossing a node boundary. Yes, I actually added this case to my kfdtest unit test and made sure it works, along with all odd alignments that the code above handles.
Regards, Felix
}
- }
- return result;
+}
- static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate,
@@ -1130,6 +1188,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn,
- .access_vram = &amdgpu_ttm_access_vram }; int amdgpu_ttm_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f137c24..a22e430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -78,6 +78,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +int amdgpu_bo_mmap(struct file *filp, struct vm_area_struct *vma,
bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); int amdgpu_ttm_bind(struct ttm_buffer_object *bo, structstruct ttm_bo_device *bdev);
ttm_mem_reg *bo_mem); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
Am 14.07.2017 um 21:44 schrieb Felix Kuehling:
On 17-07-14 06:08 AM, Christian König wrote:
Am 13.07.2017 um 23:08 schrieb Felix Kuehling: [SNIP]
result += bytes;
buf = (uint8_t *)buf + bytes;
pos += bytes;
len -= bytes;
if (pos >= (nodes->start + nodes->size) << PAGE_SHIFT) {
++nodes;
pos = (nodes->start << PAGE_SHIFT);
... Here I handle crossing a node boundary. Yes, I actually added this case to my kfdtest unit test and made sure it works, along with all odd alignments that the code above handles.
Ah, I see. Sorry totally missed that chunk. In this case the patch is Acked-by: Christian König christian.koenig@amd.com
Regards, Christian.
Regards, Felix
On 14/07/17 06:08 AM, Felix Kuehling wrote:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
Thanks for doing this. Looks mostly good, but I still have some comments.
The shortlog prefix should be "drm/ttm:".
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- unsigned long first_page = offset >> PAGE_SHIFT;
- unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT;
- unsigned long num_pages = last_page - first_page + 1;
- struct ttm_bo_kmap_obj map;
- void *ptr;
- bool is_iomem;
- int ret;
- ret = ttm_bo_kmap(bo, first_page, num_pages, &map);
- if (ret)
return ret;
Could there be cases (e.g. on 32-bit) where mapping all pages at once fails, but mapping one page at a time would work?
- offset -= first_page << PAGE_SHIFT;
- ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
- WARN_ON(is_iomem);
I suggest making this WARN_ON_ONCE, to avoid flooding dmesg if it ever triggers in practice.
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 6bbd34d..6ce5094 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -471,6 +471,18 @@ struct ttm_bo_driver { */ unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo, unsigned long page_offset);
- /**
* Read/write VRAM buffers for ptrace access
Is there any reason for making this specific to VRAM? ttm_bo_vm_access could just call this for anything except TTM_PL_SYSTEM / TTM_PL_TT, and let the driver return an error if it can't handle the memory type.
* @bo: the BO to access
* @offset: the offset from the start of the BO
* @buf: pointer to source/destination buffer
* @len: number of bytes to copy
* @write: whether to read (0) from or write (non-0) to BO
*/
The meaning of the return value should also be documented here.
- int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset,
void *buf, int len, int write);
};
I suggest making the write parameter a bool.
On 17-07-13 11:26 PM, Michel Dänzer wrote:
On 14/07/17 06:08 AM, Felix Kuehling wrote:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
Thanks for doing this. Looks mostly good, but I still have some comments.
The shortlog prefix should be "drm/ttm:".
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- unsigned long first_page = offset >> PAGE_SHIFT;
- unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT;
- unsigned long num_pages = last_page - first_page + 1;
- struct ttm_bo_kmap_obj map;
- void *ptr;
- bool is_iomem;
- int ret;
- ret = ttm_bo_kmap(bo, first_page, num_pages, &map);
- if (ret)
return ret;
Could there be cases (e.g. on 32-bit) where mapping all pages at once fails, but mapping one page at a time would work?
Maybe. I'm not really familiar with ttm_bo_kmap limitations on 32-bit. I guess the the access would have to be really big. I think in practice GDB doesn't do very big accesses. So I'm not sure it's worth the trouble.
- offset -= first_page << PAGE_SHIFT;
- ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
- WARN_ON(is_iomem);
I suggest making this WARN_ON_ONCE, to avoid flooding dmesg if it ever triggers in practice.
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 6bbd34d..6ce5094 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -471,6 +471,18 @@ struct ttm_bo_driver { */ unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo, unsigned long page_offset);
- /**
* Read/write VRAM buffers for ptrace access
Is there any reason for making this specific to VRAM? ttm_bo_vm_access could just call this for anything except TTM_PL_SYSTEM / TTM_PL_TT, and let the driver return an error if it can't handle the memory type.
Good point. I'll change that.
* @bo: the BO to access
* @offset: the offset from the start of the BO
* @buf: pointer to source/destination buffer
* @len: number of bytes to copy
* @write: whether to read (0) from or write (non-0) to BO
*/
The meaning of the return value should also be documented here.
- int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset,
void *buf, int len, int write);
};
I suggest making the write parameter a bool.
I made this as similar as possible to the vm_ops->access API, where write is also an integer.
Regards, Felix
On 15/07/17 04:47 AM, Felix Kuehling wrote:
On 17-07-13 11:26 PM, Michel Dänzer wrote:
On 14/07/17 06:08 AM, Felix Kuehling wrote:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
Thanks for doing this. Looks mostly good, but I still have some comments.
The shortlog prefix should be "drm/ttm:".
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- unsigned long first_page = offset >> PAGE_SHIFT;
- unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT;
- unsigned long num_pages = last_page - first_page + 1;
- struct ttm_bo_kmap_obj map;
- void *ptr;
- bool is_iomem;
- int ret;
- ret = ttm_bo_kmap(bo, first_page, num_pages, &map);
- if (ret)
return ret;
Could there be cases (e.g. on 32-bit) where mapping all pages at once fails, but mapping one page at a time would work?
Maybe. I'm not really familiar with ttm_bo_kmap limitations on 32-bit. I guess the the access would have to be really big. I think in practice GDB doesn't do very big accesses. So I'm not sure it's worth the trouble.
Okay, I guess it can always be changed later if necessary.
- int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset,
void *buf, int len, int write);
};
I suggest making the write parameter a bool.
I made this as similar as possible to the vm_ops->access API, where write is also an integer.
I see, makes sense.
Am 15.07.2017 um 05:32 schrieb Michel Dänzer:
On 15/07/17 04:47 AM, Felix Kuehling wrote:
On 17-07-13 11:26 PM, Michel Dänzer wrote:
On 14/07/17 06:08 AM, Felix Kuehling wrote:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
Thanks for doing this. Looks mostly good, but I still have some comments.
The shortlog prefix should be "drm/ttm:".
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- unsigned long first_page = offset >> PAGE_SHIFT;
- unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT;
- unsigned long num_pages = last_page - first_page + 1;
- struct ttm_bo_kmap_obj map;
- void *ptr;
- bool is_iomem;
- int ret;
- ret = ttm_bo_kmap(bo, first_page, num_pages, &map);
- if (ret)
return ret;
Could there be cases (e.g. on 32-bit) where mapping all pages at once fails, but mapping one page at a time would work?
Maybe. I'm not really familiar with ttm_bo_kmap limitations on 32-bit. I guess the the access would have to be really big. I think in practice GDB doesn't do very big accesses. So I'm not sure it's worth the trouble.
Okay, I guess it can always be changed later if necessary.
It would still be better to do this page by page.
See the issue is that when you only map one page ttm_bo_kmap() is clever and returns a pointer to only that page.
But when you map at least two pages ttm_bo_kmap() needs to allocate virtual address space, map the pages and flush the TLBs (which is a very heavy operation) just to make those two pages look continuously to the CPU.
Not that I expect that this function is performance critical, but that is complexity I would try to avoid.
- int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset,
};void *buf, int len, int write);
I suggest making the write parameter a bool.
I made this as similar as possible to the vm_ops->access API, where write is also an integer.
I see, makes sense.
Yeah, agree as well. Please keep the style of the upstream interface here.
Christian.
Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
One more comment additionally to what Michel already wrote below, apart from that it looks good to me.
Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com
drivers/gpu/drm/ttm/ttm_bo_vm.c | 66 ++++++++++++++++++++++++++++++++++++++++- include/drm/ttm/ttm_bo_driver.h | 12 ++++++++ 2 files changed, 77 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 9f53df9..0ef2eb9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -294,10 +294,74 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma) vma->vm_private_data = NULL; }
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
+{
- unsigned long first_page = offset >> PAGE_SHIFT;
- unsigned long last_page = (offset + len - 1) >> PAGE_SHIFT;
- unsigned long num_pages = last_page - first_page + 1;
- struct ttm_bo_kmap_obj map;
- void *ptr;
- bool is_iomem;
- int ret;
- ret = ttm_bo_kmap(bo, first_page, num_pages, &map);
- if (ret)
return ret;
- offset -= first_page << PAGE_SHIFT;
- ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
- WARN_ON(is_iomem);
- if (write)
memcpy(ptr, buf, len);
- else
memcpy(buf, ptr, len);
- ttm_bo_kunmap(&map);
- return len;
+}
+static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
void *buf, int len, int write)
+{
- unsigned long offset = (addr) - vma->vm_start;
- struct ttm_buffer_object *bo = vma->vm_private_data;
- int ret;
- if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages)
return -EIO;
- ret = ttm_bo_reserve(bo, true, false, NULL);
- if (ret)
return ret;
- switch(bo->mem.mem_type) {
- case TTM_PL_SYSTEM:
When the BO is in the system domain you need to add this as well I think:
if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { ret = ttm_tt_swapin(bo->ttm); if (unlikely(ret != 0)) return ret; }
Regards, Christian.
- case TTM_PL_TT:
ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write);
break;
- case TTM_PL_VRAM:
if (bo->bdev->driver->access_vram)
ret = bo->bdev->driver->access_vram(
bo, offset, buf, len, write);
else
ret = -EIO;
break;
- default:
ret = -EIO;
- }
- ttm_bo_unreserve(bo);
- return ret;
+}
- static const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open,
- .close = ttm_bo_vm_close
.close = ttm_bo_vm_close,
.access = ttm_bo_vm_access };
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 6bbd34d..6ce5094 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -471,6 +471,18 @@ struct ttm_bo_driver { */ unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo, unsigned long page_offset);
/**
* Read/write VRAM buffers for ptrace access
*
* @bo: the BO to access
* @offset: the offset from the start of the BO
* @buf: pointer to source/destination buffer
* @len: number of bytes to copy
* @write: whether to read (0) from or write (non-0) to BO
*/
int (*access_vram)(struct ttm_buffer_object *bo, unsigned long offset,
void *buf, int len, int write);
};
/**
On 17-07-14 06:06 AM, Christian König wrote:
Am 13.07.2017 um 23:08 schrieb Felix Kuehling:
Allows gdb to access contents of user mode mapped BOs. VRAM access requires the driver to implement a new callback in ttm_bo_driver.
One more comment additionally to what Michel already wrote below, apart from that it looks good to me.
- switch(bo->mem.mem_type) {
- case TTM_PL_SYSTEM:
When the BO is in the system domain you need to add this as well I think:
if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { ret = ttm_tt_swapin(bo->ttm); if (unlikely(ret != 0)) return ret; }
OK, thanks for pointing that out.
Regards, Felix
Regards, Christian.
dri-devel@lists.freedesktop.org