This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
Signed-off-by: Gurchetan Singh gurchetansingh@chromium.org --- drivers/gpu/drm/drm_gem_shmem_helper.c | 16 +++++++++++----- include/drm/drm_gem_shmem_helper.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 0a7e3b664bc2..d439074ad7b5 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -124,8 +124,10 @@ void drm_gem_shmem_free_object(struct drm_gem_object *obj) drm_prime_gem_destroy(obj, shmem->sgt); } else { if (shmem->sgt) { - dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl, - shmem->sgt->nents, DMA_BIDIRECTIONAL); + if (!shmem->skip_dma_api) + dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl, + shmem->dma_map_count, + DMA_BIDIRECTIONAL); sg_free_table(shmem->sgt); kfree(shmem->sgt); } @@ -422,8 +424,9 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
- dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl, - shmem->sgt->nents, DMA_BIDIRECTIONAL); + if (!shmem->skip_dma_api) + dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl, + shmem->dma_map_count, DMA_BIDIRECTIONAL); sg_free_table(shmem->sgt); kfree(shmem->sgt); shmem->sgt = NULL; @@ -695,7 +698,10 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj) goto err_put_pages; } /* Map the pages for use by the h/w. */ - dma_map_sg(obj->dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL); + if (!shmem->skip_dma_api) + shmem->dma_map_count = dma_map_sg(obj->dev->dev, sgt->sgl, + sgt->nents, + DMA_BIDIRECTIONAL);
shmem->sgt = sgt;
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 5381f0c8cf6f..2669d87cbfdd 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -101,6 +101,16 @@ struct drm_gem_shmem_object { * @map_cached: map object cached (instead of using writecombine). */ bool map_cached; + + /** + * @skip_dma_api: skip using dma api in certain places. + */ + bool skip_dma_api; + + /** + * @skip_dma_api: number of pages mapped by dma-api. + */ + bool dma_map_count; };
#define to_drm_gem_shmem_obj(obj) \
Fixes a double-free regression:
[ 4.357928] drm_gem_shmem_free_object+0xb4/0x100 [ 4.358983] virtio_gpu_dequeue_ctrl_func+0xd9/0x290 [ 4.360343] process_one_work+0x1d2/0x3a0 [ 4.361581] worker_thread+0x45/0x3c0 [ 4.362645] kthread+0xf6/0x130 [ 4.363543] ? process_one_work+0x3a0/0x3a0 [ 4.364770] ? kthread_park+0x80/0x80 [ 4.365799] ret_from_fork+0x35/0x40 [ 4.367103] Modules linked in: [ 4.367958] CR2: 0000000000000018 [ 4.368857] ---[ end trace db84f7a2974d5c79 ]--- [ 4.370118] RIP: 0010:dma_direct_unmap_sg+0x1f/0x60
We can also go back to the prior state aswell.
Fixes: d323bb44e4d2 ("drm/virtio: Call the right shmem helpers") Signed-off-by: Gurchetan Singh gurchetansingh@chromium.org --- drivers/gpu/drm/virtio/virtgpu_drv.h | 1 - drivers/gpu/drm/virtio/virtgpu_object.c | 25 ++++++------------------- drivers/gpu/drm/virtio/virtgpu_vq.c | 12 ++++++------ 3 files changed, 12 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 49bebdee6d91..66af5ea1304b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -78,7 +78,6 @@ struct virtio_gpu_object { struct virtio_gpu_object_shmem { struct virtio_gpu_object base; struct sg_table *pages; - uint32_t mapped; };
#define to_virtio_gpu_shmem(virtio_gpu_object) \ diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 346cef5ce251..ec42c5532910 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -69,16 +69,7 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle); if (virtio_gpu_is_shmem(bo)) { struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); - if (shmem->pages) { - if (shmem->mapped) { - dma_unmap_sg(vgdev->vdev->dev.parent, - shmem->pages->sgl, shmem->mapped, - DMA_TO_DEVICE); - shmem->mapped = 0; - } - - sg_free_table(shmem->pages); shmem->pages = NULL; drm_gem_shmem_unpin(&bo->base.base); } @@ -123,6 +114,7 @@ bool virtio_gpu_is_shmem(struct virtio_gpu_object *bo) struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, size_t size) { + struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_object_shmem *shmem; struct drm_gem_shmem_object *dshmem;
@@ -133,6 +125,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, dshmem = &shmem->base.base; dshmem->base.funcs = &virtio_gpu_shmem_funcs; dshmem->map_cached = true; + dshmem->skip_dma_api = virtio_has_iommu_quirk(vgdev->vdev); return &dshmem->base; }
@@ -141,7 +134,6 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, struct virtio_gpu_mem_entry **ents, unsigned int *nents) { - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo); struct scatterlist *sg; int si, ret; @@ -156,15 +148,10 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, return -EINVAL; }
- if (use_dma_api) { - shmem->mapped = dma_map_sg(vgdev->vdev->dev.parent, - shmem->pages->sgl, - shmem->pages->nents, - DMA_TO_DEVICE); - *nents = shmem->mapped; - } else { + if (!bo->base.skip_dma_api) + *nents = bo->base.dma_map_count; + else *nents = shmem->pages->nents; - }
*ents = kmalloc_array(*nents, sizeof(struct virtio_gpu_mem_entry), GFP_KERNEL); @@ -174,7 +161,7 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, }
for_each_sg(shmem->pages->sgl, sg, *nents, si) { - (*ents)[si].addr = cpu_to_le64(use_dma_api + (*ents)[si].addr = cpu_to_le64(!bo->base.skip_dma_api ? sg_dma_address(sg) : sg_phys(sg)); (*ents)[si].length = cpu_to_le32(sg->length); diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 9e663a5d9952..117e4aa69ae5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -599,12 +599,12 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_to_host_2d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
- if (use_dma_api) + if (!bo->base.skip_dma_api) dma_sync_sg_for_device(vgdev->vdev->dev.parent, - shmem->pages->sgl, shmem->pages->nents, + shmem->pages->sgl, + bo->base.dma_map_count, DMA_TO_DEVICE);
cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); @@ -1015,12 +1015,12 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(objs->objs[0]); struct virtio_gpu_transfer_host_3d *cmd_p; struct virtio_gpu_vbuffer *vbuf; - bool use_dma_api = !virtio_has_iommu_quirk(vgdev->vdev); struct virtio_gpu_object_shmem *shmem = to_virtio_gpu_shmem(bo);
- if (use_dma_api) + if (!bo->base.skip_dma_api) dma_sync_sg_for_device(vgdev->vdev->dev.parent, - shmem->pages->sgl, shmem->pages->nents, + shmem->pages->sgl, + bo->base.dma_map_count, DMA_TO_DEVICE);
cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
Hi
Am 12.06.20 um 03:36 schrieb Gurchetan Singh:
This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
It looks like you're moving virtio code into shmem. It would be nice to have a cover letter explaining the series.
Signed-off-by: Gurchetan Singh gurchetansingh@chromium.org
drivers/gpu/drm/drm_gem_shmem_helper.c | 16 +++++++++++----- include/drm/drm_gem_shmem_helper.h | 10 ++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 0a7e3b664bc2..d439074ad7b5 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -124,8 +124,10 @@ void drm_gem_shmem_free_object(struct drm_gem_object *obj) drm_prime_gem_destroy(obj, shmem->sgt); } else { if (shmem->sgt) {
dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
shmem->sgt->nents, DMA_BIDIRECTIONAL);
if (!shmem->skip_dma_api)
dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
shmem->dma_map_count,
}DMA_BIDIRECTIONAL); sg_free_table(shmem->sgt); kfree(shmem->sgt);
@@ -422,8 +424,9 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
WARN_ON(!drm_gem_shmem_is_purgeable(shmem));
- dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
shmem->sgt->nents, DMA_BIDIRECTIONAL);
- if (!shmem->skip_dma_api)
dma_unmap_sg(obj->dev->dev, shmem->sgt->sgl,
sg_free_table(shmem->sgt); kfree(shmem->sgt); shmem->sgt = NULL;shmem->dma_map_count, DMA_BIDIRECTIONAL);
@@ -695,7 +698,10 @@ struct sg_table *drm_gem_shmem_get_pages_sgt(struct drm_gem_object *obj) goto err_put_pages; } /* Map the pages for use by the h/w. */
- dma_map_sg(obj->dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL);
if (!shmem->skip_dma_api)
shmem->dma_map_count = dma_map_sg(obj->dev->dev, sgt->sgl,
sgt->nents,
DMA_BIDIRECTIONAL);
shmem->sgt = sgt;
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 5381f0c8cf6f..2669d87cbfdd 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -101,6 +101,16 @@ struct drm_gem_shmem_object { * @map_cached: map object cached (instead of using writecombine). */ bool map_cached;
- /**
* @skip_dma_api: skip using dma api in certain places.
*/
- bool skip_dma_api;
This looks like an under-documented workaround for something.
- /**
* @skip_dma_api: number of pages mapped by dma-api.
*/
- bool dma_map_count;
The documentation comment doesn't match the field name.
Best regards Thomas
};
#define to_drm_gem_shmem_obj(obj) \
On Fri, Jun 12, 2020 at 11:47:55AM +0200, Thomas Zimmermann wrote:
Hi
Am 12.06.20 um 03:36 schrieb Gurchetan Singh:
This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
It looks like you're moving virtio code into shmem.
Well, not really.
virtio has -- for historical reasons -- the oddity that it may or may not need to dma_map resources, depending on device configuration. Initially virtio went with "it's just a vm, lets simply operate on physical ram addresses". That shortcut turned out to be a bad idea later on, especially with the arrival of iommu emulation support in qemu. But we couldn't just scratch it for backward compatibility reasons. See virtio_has_iommu_quirk().
This just allows to enable/disable dma_map, I guess to fix some fallout from recent shmem helper changes (Gurchetan, that kind of stuff should be mentioned in cover letter and commit messages).
I'm not sure virtio actually needs that patch though. I *think* doing the dma_map+dma_unmap unconditionally, but then ignore the result in case we don't need it should work. And it shouldn't be a horrible performance hit either, in case we don't have a (virtual) iommu in the VM dma mapping is essentially a nop ...
take care, Gerd
On Fri, Jun 12, 2020 at 12:16 PM Gerd Hoffmann kraxel@redhat.com wrote:
On Fri, Jun 12, 2020 at 11:47:55AM +0200, Thomas Zimmermann wrote:
Hi
Am 12.06.20 um 03:36 schrieb Gurchetan Singh:
This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
It looks like you're moving virtio code into shmem.
Well, not really.
virtio has -- for historical reasons -- the oddity that it may or may not need to dma_map resources, depending on device configuration. Initially virtio went with "it's just a vm, lets simply operate on physical ram addresses". That shortcut turned out to be a bad idea later on, especially with the arrival of iommu emulation support in qemu. But we couldn't just scratch it for backward compatibility reasons. See virtio_has_iommu_quirk().
This just allows to enable/disable dma_map, I guess to fix some fallout from recent shmem helper changes (Gurchetan, that kind of stuff should be mentioned in cover letter and commit messages).
I'm not sure virtio actually needs that patch though. I *think* doing the dma_map+dma_unmap unconditionally, but then ignore the result in case we don't need it should work. And it shouldn't be a horrible performance hit either, in case we don't have a (virtual) iommu in the VM dma mapping is essentially a nop ...
Yeah that sounds a lot more like a clean solution, instead of adding random flags and stuff all over helpers for each edge-case. The sgtable still has the struct pages, so just picking the right one in virtio code seems a lot cleaner separation of concerns. -Daniel
take care, Gerd
On Fri, Jun 12, 2020 at 3:17 AM Gerd Hoffmann kraxel@redhat.com wrote:
On Fri, Jun 12, 2020 at 11:47:55AM +0200, Thomas Zimmermann wrote:
Hi
Am 12.06.20 um 03:36 schrieb Gurchetan Singh:
This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
It looks like you're moving virtio code into shmem.
Well, not really.
virtio has -- for historical reasons -- the oddity that it may or may not need to dma_map resources, depending on device configuration. Initially virtio went with "it's just a vm, lets simply operate on physical ram addresses". That shortcut turned out to be a bad idea later on, especially with the arrival of iommu emulation support in qemu. But we couldn't just scratch it for backward compatibility reasons. See virtio_has_iommu_quirk().
This just allows to enable/disable dma_map, I guess to fix some fallout from recent shmem helper changes
Yes, the main goal is to fix the double free.
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 346cef5ce251..2f7b6cd25a4b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -78,7 +78,6 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) shmem->mapped = 0; }
- sg_free_table(shmem->pages); shmem->pages = NULL; drm_gem_shmem_unpin(&bo->base.base); }
Doing only that on it's own causes log spam though
[ 10.368385] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.384957] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.389920] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.396859] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.401954] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.406694] virtio_gpu virtio5: swiotlb buffer is full (sz: 8192 bytes), total 0 (slots), used 0 (slots) [ 10.495744] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots)
Plus, I just realized the virtio dma ops and ops used by drm shmem are different, so virtio would have to unconditionally have to skip the shmem path. Perhaps the best policy is to revert d323bb44e4d2?
(Gurchetan, that kind of stuff should be mentioned in cover letter and commit messages).
Good tip.
I'm not sure virtio actually needs that patch though. I *think* doing the dma_map+dma_unmap unconditionally, but then ignore the result in case we don't need it should work. And it shouldn't be a horrible performance hit either, in case we don't have a (virtual) iommu in the VM dma mapping is essentially a nop ...
take care, Gerd
Hi
Am 12.06.20 um 20:54 schrieb Gurchetan Singh:
On Fri, Jun 12, 2020 at 3:17 AM Gerd Hoffmann kraxel@redhat.com wrote:
On Fri, Jun 12, 2020 at 11:47:55AM +0200, Thomas Zimmermann wrote:
Hi
Am 12.06.20 um 03:36 schrieb Gurchetan Singh:
This is useful for the next patch. Also, should we only unmap the amount entries that we mapped with the dma-api?
It looks like you're moving virtio code into shmem.
Well, not really.
virtio has -- for historical reasons -- the oddity that it may or may not need to dma_map resources, depending on device configuration. Initially virtio went with "it's just a vm, lets simply operate on physical ram addresses". That shortcut turned out to be a bad idea later on, especially with the arrival of iommu emulation support in qemu. But we couldn't just scratch it for backward compatibility reasons. See virtio_has_iommu_quirk().
This just allows to enable/disable dma_map, I guess to fix some fallout from recent shmem helper changes
Yes, the main goal is to fix the double free.
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 346cef5ce251..2f7b6cd25a4b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -78,7 +78,6 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) shmem->mapped = 0; }
sg_free_table(shmem->pages); shmem->pages = NULL; drm_gem_shmem_unpin(&bo->base.base); }
Doing only that on it's own causes log spam though
[ 10.368385] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.384957] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.389920] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.396859] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.401954] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots) [ 10.406694] virtio_gpu virtio5: swiotlb buffer is full (sz: 8192 bytes), total 0 (slots), used 0 (slots) [ 10.495744] virtio_gpu virtio5: swiotlb buffer is full (sz: 4096 bytes), total 0 (slots), used 0 (slots)
Plus, I just realized the virtio dma ops and ops used by drm shmem are different, so virtio would have to unconditionally have to skip the shmem path. Perhaps the best policy is to revert d323bb44e4d2?
Can you fork the shmem code into virtio and modify it according to your needs? I know that code splitting is unpopular, but at least it's a clean solution. For some GEM object functions, you might still be able to share code with shmem helpers.
Best regards Thomas
(Gurchetan, that kind of stuff should be mentioned in cover letter and commit messages).
Good tip.
I'm not sure virtio actually needs that patch though. I *think* doing the dma_map+dma_unmap unconditionally, but then ignore the result in case we don't need it should work. And it shouldn't be a horrible performance hit either, in case we don't have a (virtual) iommu in the VM dma mapping is essentially a nop ...
take care, Gerd
On Fri, Jun 12, 2020 at 11:54:54AM -0700, Gurchetan Singh wrote:
Plus, I just realized the virtio dma ops and ops used by drm shmem are different, so virtio would have to unconditionally have to skip the shmem path. Perhaps the best policy is to revert d323bb44e4d2?
Reverting d323bb44e4d2 should work given that virtio-gpu doesn't support dma-buf imports, but when doing so please add a comment to the code explaining why virtio-gpu handles this different than everybody else.
thanks, Gerd
On Mon, Jun 15, 2020 at 12:21 AM Gerd Hoffmann kraxel@redhat.com wrote:
On Fri, Jun 12, 2020 at 11:54:54AM -0700, Gurchetan Singh wrote:
Plus, I just realized the virtio dma ops and ops used by drm shmem are different, so virtio would have to unconditionally have to skip the shmem path. Perhaps the best policy is to revert d323bb44e4d2?
Reverting d323bb44e4d2 should work given that virtio-gpu doesn't support dma-buf imports, but when doing so please add a comment to the code explaining why virtio-gpu handles this different than everybody else.
Done -- sent out "drm/virtio: Revert "drm/virtio: Call the right shmem helpers" ".
thanks, Gerd
dri-devel@lists.freedesktop.org