Hi, Konrad,
Is passing a struct device to the DMA api really *strictly* necessary?
I'd like to avoid that at all cost, since we don't want pages that are backing buffer objects (coherent pages) to be associated with a specific device.
The reason for this is that we probably soon will want to move ttm buffer objects between devices, and that should ideally be a simple operation: If the memory type the buffer object currently resides in is not shared between two devices, then move it out to system memory and change its struct bo_device pointer.
If pages are associated with a specific device, this will become much harder. Basically we need to change backing pages and copy all the data.
/Thomas
On 03/08/2011 04:39 PM, Konrad Rzeszutek Wilk wrote:
These two patches are not strictly required but they do a bit of cleanup and also fix a false reporting issue when the kernel is compiled with CONFIG_DEBUG_DMA_API.
Essentially the patchset modifies 'struct ttm_tt' and 'struct ttm_bo_device' to contain a pointer to 'struct device'. Passing of the 'struct device' is done via the 'tt_bo_device_init'. The 'struct device' is then used during the allocation/deallocation of pages when TTM_PAGE_FLAG_DMA32 flag is set.
At first it seems that we would need to keep a copy of 'struct device' in the struct ttm_bo_device only and use that. However, when 'ttm_tt_destroy' is called it sets ttm->be (which contains the 'struct ttm_bo_device') to NULL so we cannot use it. Hence we copy the 'struct device' pointer to the 'struct ttm_tm' and keep it there.
I've put the patches on:
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/ttm.pci-api.v5
The testing was a bit difficult b/c I could not persuade the machine to unload either the nouveau or radeon driver (I had a serial console hooked so I figured it would fallback to that?). Ended up calling 'unbind' on the /sys/pci/.../$BDF/driver/[radeon,nouveau] to trigger the de-allocation path.
Thomas,
What is the best way to test vmw-gfx*? Does the Workstation (5?) support this particular frontend?
Konrad Rzeszutek Wilk (2): ttm: Include the 'struct dev' when using the DMA API. ttm: Pass in 'struct device' to TTM so it can do DMA API on behalf of device.
drivers/gpu/drm/nouveau/nouveau_mem.c | 3 ++- drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- drivers/gpu/drm/ttm/ttm_page_alloc.c | 11 ++++++----- drivers/gpu/drm/ttm/ttm_tt.c | 5 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 4 ++-- include/drm/ttm/ttm_bo_driver.h | 7 ++++++- include/drm/ttm/ttm_page_alloc.h | 8 ++++++-- 8 files changed, 30 insertions(+), 15 deletions(-)
The full diff:
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 26347b7..3706156 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -412,7 +412,8 @@ nouveau_mem_vram_init(struct drm_device *dev) ret = ttm_bo_device_init(&dev_priv->ttm.bdev, dev_priv->ttm.bo_global_ref.ref.object, &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
dma_bits<= 32 ? true : false);
dma_bits<= 32 ? true : false,
if (ret) { NV_ERROR(dev, "Error initialising bo driver: %d\n", ret); return ret;dev->dev);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 9ead11f..371890c 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -517,7 +517,8 @@ int radeon_ttm_init(struct radeon_device *rdev) r = ttm_bo_device_init(&rdev->mman.bdev, rdev->mman.bo_global_ref.ref.object, &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
rdev->need_dma32);
rdev->need_dma32,
if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r;rdev->dev);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index af61fc2..278a2d3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1526,12 +1526,14 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_bo_global *glob, struct ttm_bo_driver *driver, uint64_t file_page_offset,
bool need_dma32)
bool need_dma32,
struct device *dev)
{ int ret = -EINVAL;
rwlock_init(&bdev->vm_lock); bdev->driver = driver;
bdev->dev = dev;
memset(bdev->man, 0, sizeof(bdev->man));
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index 737a2a2..35849db 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -664,7 +664,7 @@ out: */ int ttm_get_pages(struct list_head *pages, int flags, enum ttm_caching_state cstate, unsigned count,
dma_addr_t *dma_address)
{ struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); struct page *p = NULL;dma_addr_t *dma_address, struct device *dev)
@@ -685,7 +685,7 @@ int ttm_get_pages(struct list_head *pages, int flags, for (r = 0; r< count; ++r) { if ((flags& TTM_PAGE_FLAG_DMA32)&& dma_address) { void *addr;
addr = dma_alloc_coherent(NULL, PAGE_SIZE,
addr = dma_alloc_coherent(dev, PAGE_SIZE, &dma_address[r], gfp_flags); if (addr == NULL)
@@ -730,7 +730,7 @@ int ttm_get_pages(struct list_head *pages, int flags, printk(KERN_ERR TTM_PFX "Failed to allocate extra pages " "for large request.");
ttm_put_pages(pages, 0, flags, cstate, NULL);
} }ttm_put_pages(pages, 0, flags, cstate, NULL, NULL); return r;
@@ -741,7 +741,8 @@ int ttm_get_pages(struct list_head *pages, int flags,
/* Put all pages in pages list to correct pool to wait for reuse */ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
enum ttm_caching_state cstate, dma_addr_t *dma_address)
enum ttm_caching_state cstate, dma_addr_t *dma_address,
{ unsigned long irq_flags; struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);struct device *dev)
@@ -757,7 +758,7 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags, void *addr = page_address(p); WARN_ON(!addr || !dma_address[r]); if (addr)
dma_free_coherent(NULL, PAGE_SIZE,
dma_free_coherent(dev, PAGE_SIZE, addr, dma_address[r]); dma_address[r] = 0;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 86d5b17..354f9d9 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -110,7 +110,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index) INIT_LIST_HEAD(&h);
ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
&ttm->dma_address[index]);
&ttm->dma_address[index], ttm->dev);
if (ret != 0) return NULL;
@@ -304,7 +304,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) } } ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
ttm->dma_address);
ttm->state = tt_unpopulated; ttm->first_himem_page = ttm->num_pages; ttm->last_lomem_page = -1;ttm->dma_address, ttm->dev);
@@ -397,6 +397,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size, ttm->last_lomem_page = -1; ttm->caching_state = tt_cached; ttm->page_flags = page_flags;
ttm->dev = bdev->dev;
ttm->dummy_read_page = dummy_read_page;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 10ca97e..803d979 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -322,11 +322,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); dev_priv->active_master =&dev_priv->fbdev_master;
- ret = ttm_bo_device_init(&dev_priv->bdev, dev_priv->bo_global_ref.ref.object, &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
false);
false,
if (unlikely(ret != 0)) { DRM_ERROR("Failed initializing TTM buffer object driver.\n"); goto out_err1;dev->dev);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index efed082..2024a74 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -177,6 +177,7 @@ struct ttm_tt { tt_unpopulated, } state; dma_addr_t *dma_address;
struct device *dev; };
#define TTM_MEMTYPE_FLAG_FIXED (1<< 0) /* Fixed (on-card) PCI memory */
@@ -551,6 +552,7 @@ struct ttm_bo_device { struct list_head device_list; struct ttm_bo_global *glob; struct ttm_bo_driver *driver;
- struct device *dev; rwlock_t vm_lock; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; spinlock_t fence_lock;
@@ -791,6 +793,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
- @file_page_offset: Offset into the device address space that is available
- for buffer data. This ensures compatibility with other users of the
- address space.
- @need_dma32: Allocate pages under 4GB
- @dev: 'struct device' of the PCI device.
- Initializes a struct ttm_bo_device:
- Returns:
@@ -799,7 +803,8 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev); extern int ttm_bo_device_init(struct ttm_bo_device *bdev, struct ttm_bo_global *glob, struct ttm_bo_driver *driver,
uint64_t file_page_offset, bool need_dma32);
uint64_t file_page_offset, bool need_dma32,
struct device *dev);
/**
- ttm_bo_unmap_virtual
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h index 8062890..ccb6b7a 100644 --- a/include/drm/ttm/ttm_page_alloc.h +++ b/include/drm/ttm/ttm_page_alloc.h @@ -37,12 +37,14 @@
- @cstate: ttm caching state for the page.
- @count: number of pages to allocate.
- @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
*/ int ttm_get_pages(struct list_head *pages, int flags, enum ttm_caching_state cstate, unsigned count,
- @dev: struct device for appropiate DMA accounting.
dma_addr_t *dma_address);
dma_addr_t *dma_address,
/**struct device *dev);
- Put linked list of pages to pool.
@@ -52,12 +54,14 @@ int ttm_get_pages(struct list_head *pages,
- @flags: ttm flags for page allocation.
- @cstate: ttm caching state.
- @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
*/ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags, enum ttm_caching_state cstate,
- @dev: struct device for appropiate DMA accounting.
dma_addr_t *dma_address);
dma_addr_t *dma_address,
/**struct device *dev);
*/
- Initialize pool allocator.