v5: rebase, add tags, add cc stable for 1+2, no code changes. v4: back to v2-ish design, but simplified a bit. v3: switch to drm_gem_object_funcs callback. v2: make shmem helper caching configurable.
Gerd Hoffmann (3): drm/shmem: add support for per object caching flags. drm/virtio: fix mmap page attributes drm/udl: simplify gem object mapping.
include/drm/drm_gem_shmem_helper.h | 5 ++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 ++++-- drivers/gpu/drm/udl/udl_gem.c | 62 ++----------------------- drivers/gpu/drm/virtio/virtgpu_object.c | 1 + 4 files changed, 20 insertions(+), 63 deletions(-)
Add map_cached bool to drm_gem_shmem_object, to request cached mappings on a per-object base. Check the flag before adding writecombine to pgprot bits.
Cc: stable@vger.kernel.org Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- include/drm/drm_gem_shmem_helper.h | 5 +++++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count; + + /** + * @map_cached: map object cached (instead of using writecombine). + */ + bool map_cached; };
#define to_drm_gem_shmem_obj(obj) \ diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..aad9324dcf4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use;
- if (obj->import_attach) + if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); - else + } else { + pgprot_t prot = PAGE_KERNEL; + + if (!shmem->map_cached) + prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + VM_MAP, prot); + }
if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) }
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + if (!shmem->map_cached) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); vma->vm_ops = &drm_gem_shmem_vm_ops;
-----Original Message----- From: Gerd Hoffmann kraxel@redhat.com Sent: 26 February 2020 16:48 To: dri-devel@lists.freedesktop.org Cc: tzimmermann@suse.de; gurchetansingh@chromium.org; olvaffe@gmail.com; Guillaume Gardet Guillaume.Gardet@arm.com; Gerd Hoffmann kraxel@redhat.com; stable@vger.kernel.org; Maarten Lankhorst maarten.lankhorst@linux.intel.com; Maxime Ripard mripard@kernel.org; David Airlie airlied@linux.ie; Daniel Vetter daniel@ffwll.ch; open list <linux- kernel@vger.kernel.org> Subject: [PATCH v5 1/3] drm/shmem: add support for per object caching flags.
Add map_cached bool to drm_gem_shmem_object, to request cached mappings on a per-object base. Check the flag before adding writecombine to pgprot bits.
Cc: stable@vger.kernel.org Signed-off-by: Gerd Hoffmann kraxel@redhat.com
Tested-by: Guillaume Gardet Guillaume.Gardet@arm.com
include/drm/drm_gem_shmem_helper.h | 5 +++++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object {
- The address are un-mapped when the count reaches zero.
*/ unsigned int vmap_use_count;
+/**
- @map_cached: map object cached (instead of using writecombine).
- */
+bool map_cached; };
#define to_drm_gem_shmem_obj(obj) \ diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..aad9324dcf4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use;
-if (obj->import_attach) +if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); -else +} else { +pgprot_t prot = PAGE_KERNEL;
+if (!shmem->map_cached) +prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
- VM_MAP,
pgprot_writecombine(PAGE_KERNEL));
- VM_MAP, prot);
+}
if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) }
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; -vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma-
vm_flags));
+vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); +if (!shmem->map_cached) +vma->vm_page_prot = pgprot_writecombine(vma-
vm_page_prot);
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); vma->vm_ops = &drm_gem_shmem_vm_ops;
-- 2.18.2
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Hi, Gerd,
While looking at this patchset I came across some stuff that seems strange but that was merged in a previous patchset.
(please refer to https://lists.freedesktop.org/archives/dri-devel/2018-September/190001.html. Forgive me if I've missed any discussion leading up to this).
On 2/26/20 4:47 PM, Gerd Hoffmann wrote:
Add map_cached bool to drm_gem_shmem_object, to request cached mappings on a per-object base. Check the flag before adding writecombine to pgprot bits.
Cc: stable@vger.kernel.org Signed-off-by: Gerd Hoffmann kraxel@redhat.com
include/drm/drm_gem_shmem_helper.h | 5 +++++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count;
/**
* @map_cached: map object cached (instead of using writecombine).
*/
bool map_cached; };
#define to_drm_gem_shmem_obj(obj) \
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..aad9324dcf4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use;
- if (obj->import_attach)
- if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
- else
- } else {
pgprot_t prot = PAGE_KERNEL;
if (!shmem->map_cached)
shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,prot = pgprot_writecombine(prot);
VM_MAP, pgprot_writecombine(PAGE_KERNEL));
VM_MAP, prot)
Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with the linear kernel map which is not write-combined? Or do you change the linear kernel map of the shmem pages somewhere? vmap bypassess at least the x86 PAT core mapping consistency check and this could potentially cause spuriously overwritten memory.
}
if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) }
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
- vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
- vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- if (!shmem->map_cached)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
Same thing here. Note that vmf_insert_page() which is used by the fault handler also bypasses the x86 PAT consistency check, whereas vmf_insert_mixed() doesn't.
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
At least with SME or SEV encryption, where shmem memory has its kernel map set to encrypted, creating conflicting mappings is explicitly disallowed. BTW, why is mmap mapping decrypted while vmap isn't?
vma->vm_ops = &drm_gem_shmem_vm_ops;
Thanks, Thomas
On Wed, Feb 26, 2020 at 10:25 AM Thomas Hellström (VMware) thomas_os@shipmail.org wrote:
Hi, Gerd,
While looking at this patchset I came across some stuff that seems strange but that was merged in a previous patchset.
(please refer to https://lists.freedesktop.org/archives/dri-devel/2018-September/190001.html. Forgive me if I've missed any discussion leading up to this).
On 2/26/20 4:47 PM, Gerd Hoffmann wrote:
Add map_cached bool to drm_gem_shmem_object, to request cached mappings on a per-object base. Check the flag before adding writecombine to pgprot bits.
Cc: stable@vger.kernel.org Signed-off-by: Gerd Hoffmann kraxel@redhat.com
include/drm/drm_gem_shmem_helper.h | 5 +++++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count;
/**
* @map_cached: map object cached (instead of using writecombine).
*/
bool map_cached;
};
#define to_drm_gem_shmem_obj(obj) \
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..aad9324dcf4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use;
if (obj->import_attach)
if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
else
} else {
pgprot_t prot = PAGE_KERNEL;
if (!shmem->map_cached)
prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
VM_MAP, pgprot_writecombine(PAGE_KERNEL));
VM_MAP, prot)
Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with the linear kernel map which is not write-combined? Or do you change the linear kernel map of the shmem pages somewhere? vmap bypassess at least the x86 PAT core mapping consistency check and this could potentially cause spuriously overwritten memory.
Yeah, I think this creates a conflicting alias. It seems a call to set_pages_array_wc here or changes elsewhere is needed..
But this is a pre-existing issue in the shmem helper. There is also no universal fix (e.g., set_pages_array_wc is x86 only)? I would hope this series can be merged sooner to fix the regression first.
} if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) }
vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
if (!shmem->map_cached)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
Same thing here. Note that vmf_insert_page() which is used by the fault handler also bypasses the x86 PAT consistency check, whereas vmf_insert_mixed() doesn't.
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
At least with SME or SEV encryption, where shmem memory has its kernel map set to encrypted, creating conflicting mappings is explicitly disallowed. BTW, why is mmap mapping decrypted while vmap isn't?
vma->vm_ops = &drm_gem_shmem_vm_ops;
Thanks, Thomas
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On 2/27/20 1:02 AM, Chia-I Wu wrote:
On Wed, Feb 26, 2020 at 10:25 AM Thomas Hellström (VMware) thomas_os@shipmail.org wrote:
Hi, Gerd,
#define to_drm_gem_shmem_obj(obj) \ diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index a421a2eed48a..aad9324dcf4f 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use;
if (obj->import_attach)
if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
else
} else {
pgprot_t prot = PAGE_KERNEL;
if (!shmem->map_cached)
prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
VM_MAP, pgprot_writecombine(PAGE_KERNEL));
VM_MAP, prot)
Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with the linear kernel map which is not write-combined? Or do you change the linear kernel map of the shmem pages somewhere? vmap bypassess at least the x86 PAT core mapping consistency check and this could potentially cause spuriously overwritten memory.
Yeah, I think this creates a conflicting alias. It seems a call to set_pages_array_wc here or changes elsewhere is needed..
But this is a pre-existing issue in the shmem helper. There is also no universal fix (e.g., set_pages_array_wc is x86 only)? I would hope this series can be merged sooner to fix the regression first.
The problem is this isn't doable with shmem, since that would create interesting problems when pages are swapped out.
So I would argue that the correct fix is to revert commit 0be895893607f ("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap")
If some drivers can argue that in their particular environment it's safe to use writecombine in this way, then modifying the page protection should be moved out to those drivers documenting that assumption. Using pgprot_decrypted() in this way could never be safe.
But IMO this should never be part of generic helpers.
/Thomas
Hi,
if (!shmem->map_cached)
shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,prot = pgprot_writecombine(prot);
VM_MAP, pgprot_writecombine(PAGE_KERNEL));
VM_MAP, prot)
Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with the linear kernel map which is not write-combined?
I think so, yes.
Or do you change the linear kernel map of the shmem pages somewhere?
Havn't seen anything doing so while browsing the code.
vmap bypassess at least the x86 PAT core mapping consistency check and this could potentially cause spuriously overwritten memory.
Well, I don't think the linear kernel map is ever used to access the shmem gem objects. So while this isn't exactly clean it shouldn't cause problems in practice.
Suggestions how to fix that?
The reason I need cachable gem object mappings for virtio-gpu is because we have a inconsistency between host (cached) and guest (wc) otherwise.
- } if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n");
@@ -540,7 +545,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) } vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
- vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
- vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- if (!shmem->map_cached)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
Same thing here. Note that vmf_insert_page() which is used by the fault handler also bypasses the x86 PAT consistency check, whereas vmf_insert_mixed() doesn't.
vmap + mmap are consistent though, so this likewise shouldn't cause issues in practice.
vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
At least with SME or SEV encryption, where shmem memory has its kernel map set to encrypted, creating conflicting mappings is explicitly disallowed. BTW, why is mmap mapping decrypted while vmap isn't?
Ok, that sounds like a real problem. Have to check.
cheers, Gerd
PS: Given we are discussing pre-existing issues in the code I think the series can be merged nevertheless.
On 2/27/20 8:53 AM, Gerd Hoffmann wrote:
Hi,
if (!shmem->map_cached)
prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
VM_MAP, pgprot_writecombine(PAGE_KERNEL));
VM_MAP, prot)
Wouldn't a vmap with pgprot_writecombine() create conflicting mappings with the linear kernel map which is not write-combined?
I think so, yes.
Or do you change the linear kernel map of the shmem pages somewhere?
Havn't seen anything doing so while browsing the code.
vmap bypassess at least the x86 PAT core mapping consistency check and this could potentially cause spuriously overwritten memory.
Well, I don't think the linear kernel map is ever used to access the shmem gem objects. So while this isn't exactly clean it shouldn't cause problems in practice.
Suggestions how to fix that?
So this has historically caused problems since the linear kernel map has been accessed while prefetching, even if it's never used. Some processors like AMD athlon actually even wrote back the prefetched contents without ever using it.
Also the linear kernel map could be cached somewhere because of the page's previous usage. (hibernation for example?)
I think it might be safe for some integrated graphics where the driver maintainers can guarantee that it's safe on all particular processors used with that driver, but then IMO it should be moved out to those drivers.
Other drivers needing write-combine shouldn't really use shmem.
So again, to fix the regression, could we revert 0be895893607f ("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap") or does that have other implications?
/Thomas
Hi,
I think it might be safe for some integrated graphics where the driver maintainers can guarantee that it's safe on all particular processors used with that driver, but then IMO it should be moved out to those drivers.
Other drivers needing write-combine shouldn't really use shmem.
So again, to fix the regression, could we revert 0be895893607f ("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap") or does that have other implications?
This patch isn't a regression. The old code path has the pgprot_writecombine() call in drm_gem_mmap_obj(), so the behavior is the same before and afterwards.
But with the patch in place we can easily have shmem helpers do their own thing instead of depending on whatever drm_gem_mmap_obj() is doing. Just using cached mappings unconditionally would be perfectly fine for virtio-gpu.
Not sure about the other users though. I'd like to fix the virtio-gpu regression (coming from ttm -> shmem switch) asap, and I don't feel like changing the behavior for other drivers in 5.6-rc is a good idea.
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
cheers, Gerd
On 2/27/20 11:56 AM, Gerd Hoffmann wrote:
Hi,
I think it might be safe for some integrated graphics where the driver maintainers can guarantee that it's safe on all particular processors used with that driver, but then IMO it should be moved out to those drivers.
Other drivers needing write-combine shouldn't really use shmem.
So again, to fix the regression, could we revert 0be895893607f ("drm/shmem: switch shmem helper to &drm_gem_object_funcs.mmap") or does that have other implications?
This patch isn't a regression. The old code path has the pgprot_writecombine() call in drm_gem_mmap_obj(), so the behavior is the same before and afterwards.
OK. I wasn't checking where this all came from from the start...
But with the patch in place we can easily have shmem helpers do their own thing instead of depending on whatever drm_gem_mmap_obj() is doing. Just using cached mappings unconditionally would be perfectly fine for virtio-gpu.
Not sure about the other users though. I'd like to fix the virtio-gpu regression (coming from ttm -> shmem switch) asap, and I don't feel like changing the behavior for other drivers in 5.6-rc is a good idea.
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
OK with me. Do we have any idea what drivers are actually using write-combine and decrypted?
/Thomas
cheers, Gerd
Hi,
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
OK with me.
Done.
[ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted? we get conflicting mappings because of that, linear kernel map vs. gem object vmap/mmap ]
Do we have any idea what drivers are actually using write-combine and decrypted?
drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER ./lima/Kconfig ./tiny/Kconfig ./cirrus/Kconfig ./Kconfig ./panfrost/Kconfig ./udl/Kconfig ./v3d/Kconfig ./virtio/Kconfig
virtio needs cached. cirrus+udl should be ok with cached too.
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@).
On decrypted: I guess that is only relevant for virtual machines, i.e. virtio-gpu and cirrus?
virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
cheers, Gerd
Hi,
On 2/27/20 2:21 PM, Gerd Hoffmann wrote:
Hi,
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
OK with me.
Done.
[ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted? we get conflicting mappings because of that, linear kernel map vs. gem object vmap/mmap ]
Do we have any idea what drivers are actually using write-combine and decrypted?
drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER ./lima/Kconfig ./tiny/Kconfig ./cirrus/Kconfig ./Kconfig ./panfrost/Kconfig ./udl/Kconfig ./v3d/Kconfig ./virtio/Kconfig
virtio needs cached. cirrus+udl should be ok with cached too.
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@).
On decrypted: I guess that is only relevant for virtual machines, i.e. virtio-gpu and cirrus?
virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
Decrypted here is clearly incorrect and violates the SEV spec, regardless of a config option. The only correct way is currently to use dma_alloc_coherent() and mmap_coherent() to allocate decrypted memory and then use the pgprot_decrypted flag.
Since the same page is aliased with two physical addresses (one encrypted and one decrypted) switching memory from one to the other needs extensive handling even to flush stale vmaps...
So if virtio-gpu needs it for some bos, it should move away from shmem for those bos.
/Thomas
cheers, Gerd
On 2/27/20 2:44 PM, Thomas Hellström (VMware) wrote:
Hi,
On 2/27/20 2:21 PM, Gerd Hoffmann wrote:
Hi,
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
OK with me.
Done.
[ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted? we get conflicting mappings because of that, linear kernel map vs. gem object vmap/mmap ]
Do we have any idea what drivers are actually using write-combine and decrypted?
drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER ./lima/Kconfig ./tiny/Kconfig ./cirrus/Kconfig ./Kconfig ./panfrost/Kconfig ./udl/Kconfig ./v3d/Kconfig ./virtio/Kconfig
virtio needs cached. cirrus+udl should be ok with cached too.
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@).
On decrypted: I guess that is only relevant for virtual machines, i.e. virtio-gpu and cirrus?
virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
Decrypted here is clearly incorrect and violates the SEV spec, regardless of a config option. The only correct way is currently to use dma_alloc_coherent() and mmap_coherent() to allocate decrypted memory and then use the pgprot_decrypted flag.
Since the same page is aliased with two physical addresses (one encrypted and one decrypted) switching memory from one to the other needs extensive handling even to flush stale vmaps...
So if virtio-gpu needs it for some bos, it should move away from shmem for those bos.
(But this is of course up to the virtio-gpu and drm maintainers), but IMO, again, pgprot_decrypted() shouldn't be part of generic helpers.
/Thomas
Hi,
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@).
virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
Decrypted here is clearly incorrect and violates the SEV spec, regardless of a config option.
The only correct way is currently to use dma_alloc_coherent() and mmap_coherent() to allocate decrypted memory and then use the pgprot_decrypted flag.
Hmm, virtio-gpu uses the dma api to allow the host access the gem object. So I think I have to correct the statement above, if I understands things correctly the dma api will use (properly allocated) decrypted bounce buffers and the virtio-gpu shmem objects don't need pgprot_decrypted mappings.
That leaves the question what to do about pgprot_writecombine(). Any comments from the driver maintainers (see first paragraph)?
cheers, Gerd
On 2/28/20 10:49 AM, Gerd Hoffmann wrote:
Hi,
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@). virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
Decrypted here is clearly incorrect and violates the SEV spec, regardless of a config option.
The only correct way is currently to use dma_alloc_coherent() and mmap_coherent() to allocate decrypted memory and then use the pgprot_decrypted flag.
Hmm, virtio-gpu uses the dma api to allow the host access the gem object. So I think I have to correct the statement above, if I understands things correctly the dma api will use (properly allocated) decrypted bounce buffers and the virtio-gpu shmem objects don't need pgprot_decrypted mappings.
Yes, that sounds more correct. I wonder whether the "pgprot_decrypted()" perhaps remains from mapping VRAM gem buffers...
/Thomas
That leaves the question what to do about pgprot_writecombine(). Any comments from the driver maintainers (see first paragraph)?
cheers, Gerd
On Fri, Feb 28, 2020 at 10:54:54AM +0100, Thomas Hellström (VMware) wrote:
On 2/28/20 10:49 AM, Gerd Hoffmann wrote:
Hi,
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@). virtio-gpu needs it, otherwise the host can't show the virtual display. cirrus bounces everything via blits to vram, so it should be ok without decrypted. I guess that implies we should make decrypted configurable.
Decrypted here is clearly incorrect and violates the SEV spec, regardless of a config option.
The only correct way is currently to use dma_alloc_coherent() and mmap_coherent() to allocate decrypted memory and then use the pgprot_decrypted flag.
Hmm, virtio-gpu uses the dma api to allow the host access the gem object. So I think I have to correct the statement above, if I understands things correctly the dma api will use (properly allocated) decrypted bounce buffers and the virtio-gpu shmem objects don't need pgprot_decrypted mappings.
Yes, that sounds more correct. I wonder whether the "pgprot_decrypted()" perhaps remains from mapping VRAM gem buffers...
Commit 95cf9264d5f3 ("x86, drm, fbdev: Do not specify encrypted memory for video mappings") added it, then it was kept through various changes.
cheers, Gerd
On Thu, Feb 27, 2020 at 9:21 PM Gerd Hoffmann kraxel@redhat.com wrote:
Hi,
So I'd like to push patches 1+2 to -fixes and sort everything else later in -next. OK?
OK with me.
Done.
[ context: why shmem helpers use pgprot_writecombine + pgprot_decrypted? we get conflicting mappings because of that, linear kernel map vs. gem object vmap/mmap ]
Do we have any idea what drivers are actually using write-combine and decrypted?
drivers/gpu/drm# find -name Kconfig* -print | xargs grep -l DRM_GEM_SHMEM_HELPER ./lima/Kconfig ./tiny/Kconfig ./cirrus/Kconfig ./Kconfig ./panfrost/Kconfig ./udl/Kconfig ./v3d/Kconfig ./virtio/Kconfig
virtio needs cached. cirrus+udl should be ok with cached too.
Not clue about the others (lima, tiny, panfrost, v3d). Maybe they use write-combine just because this is what they got by default from drm_gem_mmap_obj(). Maybe they actually need that. Trying to Cc: maintainters (and drop stable@).
lima driver needs writecombine mapped buffer for GPU hardware working properly due to CPU-GPU not cache coherent. Although we haven't meet problems caused by kernel/user map conflict, but I do admit it's a problem as I met with amdgpu+arm before.
With TTM we can control page allocated and create a pool for writecombine pages, but seems shmem is not friendly with writecombine pages.
Thanks, Qiang
virtio-gpu uses cached mappings, set drm_gem_shmem_object.map_cached accordingly.
Cc: stable@vger.kernel.org Fixes: c66df701e783 ("drm/virtio: switch from ttm to gem shmem helpers") Reported-by: Gurchetan Singh gurchetansingh@chromium.org Reported-by: Guillaume Gardet Guillaume.Gardet@arm.com Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- drivers/gpu/drm/virtio/virtgpu_object.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 3d2a6d489bfc..59319435218f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -119,6 +119,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, return NULL;
bo->base.base.funcs = &virtio_gpu_gem_funcs; + bo->base.map_cached = true; return &bo->base.base; }
-----Original Message----- From: Gerd Hoffmann kraxel@redhat.com Sent: 26 February 2020 16:48 To: dri-devel@lists.freedesktop.org Cc: tzimmermann@suse.de; gurchetansingh@chromium.org; olvaffe@gmail.com; Guillaume Gardet Guillaume.Gardet@arm.com; Gerd Hoffmann kraxel@redhat.com; stable@vger.kernel.org; David Airlie airlied@linux.ie; Daniel Vetter daniel.vetter@ffwll.ch; open list:VIRTIO GPU DRIVER virtualization@lists.linux-foundation.org; open list <linux- kernel@vger.kernel.org> Subject: [PATCH v5 2/3] drm/virtio: fix mmap page attributes
virtio-gpu uses cached mappings, set drm_gem_shmem_object.map_cached accordingly.
Cc: stable@vger.kernel.org Fixes: c66df701e783 ("drm/virtio: switch from ttm to gem shmem helpers") Reported-by: Gurchetan Singh gurchetansingh@chromium.org Reported-by: Guillaume Gardet Guillaume.Gardet@arm.com Signed-off-by: Gerd Hoffmann kraxel@redhat.com
Tested-by: Guillaume Gardet Guillaume.Gardet@arm.com
drivers/gpu/drm/virtio/virtgpu_object.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 3d2a6d489bfc..59319435218f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -119,6 +119,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, return NULL;
bo->base.base.funcs = &virtio_gpu_gem_funcs; +bo->base.map_cached = true; return &bo->base.base; }
-- 2.18.2
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
With shmem helpers allowing to update pgprot caching flags via drm_gem_shmem_object.map_cached we can just use that and ditch our own implementations of mmap() and vmap().
We also don't need a special case for imported objects, any map requests are handled by the exporter not udl.
Signed-off-by: Gerd Hoffmann kraxel@redhat.com --- drivers/gpu/drm/udl/udl_gem.c | 62 ++--------------------------------- 1 file changed, 3 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index b6e26f98aa0a..7e3a88b25b6b 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -17,72 +17,15 @@ * GEM object funcs */
-static int udl_gem_object_mmap(struct drm_gem_object *obj, - struct vm_area_struct *vma) -{ - int ret; - - ret = drm_gem_shmem_mmap(obj, vma); - if (ret) - return ret; - - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - if (obj->import_attach) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); - - return 0; -} - -static void *udl_gem_object_vmap(struct drm_gem_object *obj) -{ - struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - int ret; - - ret = mutex_lock_interruptible(&shmem->vmap_lock); - if (ret) - return ERR_PTR(ret); - - if (shmem->vmap_use_count++ > 0) - goto out; - - ret = drm_gem_shmem_get_pages(shmem); - if (ret) - goto err_zero_use; - - if (obj->import_attach) - shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); - else - shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, - VM_MAP, PAGE_KERNEL); - - if (!shmem->vaddr) { - DRM_DEBUG_KMS("Failed to vmap pages\n"); - ret = -ENOMEM; - goto err_put_pages; - } - -out: - mutex_unlock(&shmem->vmap_lock); - return shmem->vaddr; - -err_put_pages: - drm_gem_shmem_put_pages(shmem); -err_zero_use: - shmem->vmap_use_count = 0; - mutex_unlock(&shmem->vmap_lock); - return ERR_PTR(ret); -} - static const struct drm_gem_object_funcs udl_gem_object_funcs = { .free = drm_gem_shmem_free_object, .print_info = drm_gem_shmem_print_info, .pin = drm_gem_shmem_pin, .unpin = drm_gem_shmem_unpin, .get_sg_table = drm_gem_shmem_get_sg_table, - .vmap = udl_gem_object_vmap, + .vmap = drm_gem_shmem_vmap, .vunmap = drm_gem_shmem_vunmap, - .mmap = udl_gem_object_mmap, + .mmap = drm_gem_shmem_mmap, };
/* @@ -101,6 +44,7 @@ struct drm_gem_object *udl_driver_gem_create_object(struct drm_device *dev,
obj = &shmem->base; obj->funcs = &udl_gem_object_funcs; + shmem->map_cached = true;
return obj; }
On Wed, Feb 26, 2020 at 7:48 AM Gerd Hoffmann kraxel@redhat.com wrote:
v5: rebase, add tags, add cc stable for 1+2, no code changes. v4: back to v2-ish design, but simplified a bit. v3: switch to drm_gem_object_funcs callback. v2: make shmem helper caching configurable.
Gerd Hoffmann (3): drm/shmem: add support for per object caching flags. drm/virtio: fix mmap page attributes drm/udl: simplify gem object mapping.
Series is:
Reviewed-by: Gurchetan Singh gurchetansingh@chromium.org
include/drm/drm_gem_shmem_helper.h | 5 ++ drivers/gpu/drm/drm_gem_shmem_helper.c | 15 ++++-- drivers/gpu/drm/udl/udl_gem.c | 62 ++----------------------- drivers/gpu/drm/virtio/virtgpu_object.c | 1 + 4 files changed, 20 insertions(+), 63 deletions(-)
-- 2.18.2
dri-devel@lists.freedesktop.org