From: Zack Rusin zackr@vmware.com
This is a set of various smaller fixes. The 5th, drm/ttm one needs to be looked at by Christian, especially because it's a kernel oops. The others are largely trivial vmwgfx fixes.
Zack Rusin (5): drm/vmwgfx: Fix an invalid read drm/vmwgfx: Fix mob cursor allocation race drm/vmwgfx: validate the screen formats drm/vmwgfx: Disable command buffers on svga3 without gbobjects drm/ttm: Fix a kernel oops due to an invalid read
drivers/gpu/drm/ttm/ttm_range_manager.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 11 ++++--- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 38 +++++++++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 1 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 14 ++++----- 5 files changed, 38 insertions(+), 28 deletions(-)
From: Zack Rusin zackr@vmware.com
vmw_move assumed that buffers to be moved would always be vmw_buffer_object's but after introduction of new placement for mob pages that's no longer the case. The resulting invalid read didn't have any practical consequences because the memory isn't used unless the object actually is a vmw_buffer_object. Fix it by moving the cast to the spot where the results are used.
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: f6be23264bba ("drm/vmwgfx: Introduce a new placement for MOB page tables") Reported-by: Chuck Lever III chuck.lever@oracle.com Reviewed-by: Martin Krastev krastevm@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 708899ba2102..6542f1498651 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -859,22 +859,21 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, struct ttm_device *bdev = bo->bdev; struct vmw_private *dev_priv;
- dev_priv = container_of(bdev, struct vmw_private, bdev);
mutex_lock(&dev_priv->binding_mutex);
- dx_query_mob = container_of(bo, struct vmw_buffer_object, base); - if (!dx_query_mob || !dx_query_mob->dx_query_ctx) { - mutex_unlock(&dev_priv->binding_mutex); - return; - } - /* If BO is being moved from MOB to system memory */ if (new_mem->mem_type == TTM_PL_SYSTEM && old_mem->mem_type == VMW_PL_MOB) { struct vmw_fence_obj *fence;
+ dx_query_mob = container_of(bo, struct vmw_buffer_object, base); + if (!dx_query_mob || !dx_query_mob->dx_query_ctx) { + mutex_unlock(&dev_priv->binding_mutex); + return; + } + (void) vmw_query_readback_all(dx_query_mob); mutex_unlock(&dev_priv->binding_mutex);
@@ -888,7 +887,6 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, (void) ttm_bo_wait(bo, false, false); } else mutex_unlock(&dev_priv->binding_mutex); - }
/**
Hi Zack-
On Mar 18, 2022, at 1:43 PM, Zack Rusin zack@kde.org wrote:
From: Zack Rusin zackr@vmware.com
vmw_move assumed that buffers to be moved would always be vmw_buffer_object's but after introduction of new placement for mob pages that's no longer the case. The resulting invalid read didn't have any practical consequences because the memory isn't used unless the object actually is a vmw_buffer_object. Fix it by moving the cast to the spot where the results are used.
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: f6be23264bba ("drm/vmwgfx: Introduce a new placement for MOB page tables") Reported-by: Chuck Lever III chuck.lever@oracle.com Reviewed-by: Martin Krastev krastevm@vmware.com
After applying this patch, I am not able to reproduce the KASAN splat I reported earlier this week.
Tested-by: Chuck Lever chuck.lever@oracle.com
drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 708899ba2102..6542f1498651 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -859,22 +859,21 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, struct ttm_device *bdev = bo->bdev; struct vmw_private *dev_priv;
dev_priv = container_of(bdev, struct vmw_private, bdev);
mutex_lock(&dev_priv->binding_mutex);
dx_query_mob = container_of(bo, struct vmw_buffer_object, base);
if (!dx_query_mob || !dx_query_mob->dx_query_ctx) {
mutex_unlock(&dev_priv->binding_mutex);
return;
}
/* If BO is being moved from MOB to system memory */ if (new_mem->mem_type == TTM_PL_SYSTEM && old_mem->mem_type == VMW_PL_MOB) { struct vmw_fence_obj *fence;
dx_query_mob = container_of(bo, struct vmw_buffer_object, base);
if (!dx_query_mob || !dx_query_mob->dx_query_ctx) {
mutex_unlock(&dev_priv->binding_mutex);
return;
}
- (void) vmw_query_readback_all(dx_query_mob); mutex_unlock(&dev_priv->binding_mutex);
@@ -888,7 +887,6 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo, (void) ttm_bo_wait(bo, false, false); } else mutex_unlock(&dev_priv->binding_mutex);
}
/**
2.32.0
-- Chuck Lever
From: Zack Rusin zackr@vmware.com
Writes to SVGA_REG_CURSOR_MOBID did not wait for the buffers to be fully populated. This sometimes results in the device not being aware of the buffer when the cursor mob register was written. Properly wait for the buffer to be fully populated before setting it as a cursor mob.
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: 485d98d472d5 ("drm/vmwgfx: Add support for CursorMob and CursorBypass 4") Reviewed-by: Martin Krastev krastevm@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index af252210ef84..7a23f252d212 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -602,6 +602,14 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
ret = ttm_bo_kmap(cm_bo, 0, PFN_UP(size), &vps->cm_map);
+ /* + * We just want to try to get mob bind to finish + * so that the first write to SVGA_REG_CURSOR_MOBID + * is done with a buffer that the device has already + * seen + */ + (void) ttm_bo_wait(cm_bo, false, false); + ttm_bo_unreserve(cm_bo);
if (unlikely(ret != 0)) {
From: Zack Rusin zackr@vmware.com
The kms code wasn't validating the modifiers and was letting through unsupported formats. rgb8 was never properly supported and has no matching svga screen target format so remove it. This fixes format/modifier failures in kms_addfb_basic from IGT.
Signed-off-by: Zack Rusin zackr@vmware.com Reviewed-by: Martin Krastev krastevm@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 30 +++++++++++++++-------------- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 1 - 2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 7a23f252d212..693028c31b6b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1171,6 +1171,15 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, * Sanity checks. */
+ if (!drm_any_plane_has_format(&dev_priv->drm, + mode_cmd->pixel_format, + mode_cmd->modifier[0])) { + drm_dbg(&dev_priv->drm, + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); + return -EINVAL; + } + /* Surface must be marked as a scanout. */ if (unlikely(!surface->metadata.scanout)) return -EINVAL; @@ -1493,20 +1502,13 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, return -EINVAL; }
- /* Limited framebuffer color depth support for screen objects */ - if (dev_priv->active_display_unit == vmw_du_screen_object) { - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - break; - case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_RGB565: - break; - default: - DRM_ERROR("Invalid pixel format: %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } + if (!drm_any_plane_has_format(&dev_priv->drm, + mode_cmd->pixel_format, + mode_cmd->modifier[0])) { + drm_dbg(&dev_priv->drm, + "unsupported pixel format %p4cc / modifier 0x%llx\n", + &mode_cmd->pixel_format, mode_cmd->modifier[0]); + return -EINVAL; }
vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index c95be95deb8d..1d1c8b82c898 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -247,7 +247,6 @@ struct vmw_framebuffer_bo { static const uint32_t __maybe_unused vmw_primary_plane_formats[] = { DRM_FORMAT_XRGB1555, DRM_FORMAT_RGB565, - DRM_FORMAT_RGB888, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, };
From: Zack Rusin zackr@vmware.com
With very limited vram on svga3 it's difficult to handle all the surface migrations. Without gbobjects, i.e. the ability to store surfaces in guest mobs, there's no reason to support intermediate svga2 features, especially because we can fall back to fb traces and svga3 will never support those in-between features.
On svga3 we wither want to use fb traces or screen targets (i.e. gbobjects), nothing in between. This fixes presentation on a lot of fusion/esxi tech previews where the exposed svga3 caps haven't been finalized yet.
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") Cc: stable@vger.kernel.org # v5.14+ Reviewed-by: Martin Krastev krastevm@vmware.com --- drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c index bf1b394753da..162dfeb1cc5a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c @@ -675,11 +675,14 @@ int vmw_cmd_emit_dummy_query(struct vmw_private *dev_priv, */ bool vmw_cmd_supported(struct vmw_private *vmw) { - if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | - SVGA_CAP_CMD_BUFFERS_2)) != 0) - return true; + bool has_cmdbufs = + (vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | + SVGA_CAP_CMD_BUFFERS_2)) != 0; + if (vmw_is_svga_v3(vmw)) + return (has_cmdbufs && + (vmw->capabilities & SVGA_CAP_GBOBJECTS) != 0); /* * We have FIFO cmd's */ - return vmw->fifo_mem != NULL; + return has_cmdbufs || vmw->fifo_mem != NULL; }
From: Zack Rusin zackr@vmware.com
The res is initialized here only if there's no errors so passing it to ttm_resource_fini in the error paths results in a kernel oops. In the error paths, instead of the unitialized res, we have to use to use node->base on which ttm_resource_init was called.
Sample affected backtrace: Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d8 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000106ac0000 [00000000000000d8] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: bnep vsock_loopback vmw_vsock_virtio_transport_common vsock snd_hda_codec_generic snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep > CPU: 0 PID: 1197 Comm: gnome-shell Tainted: G U 5.17.0-rc2-vmwgfx #2 Hardware name: VMware, Inc. VBSA/VBSA, BIOS VEFI 12/31/2020 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ttm_resource_fini+0x5c/0xac [ttm] lr : ttm_range_man_alloc+0x128/0x1e0 [ttm] sp : ffff80000d783510 x29: ffff80000d783510 x28: 0000000000000000 x27: ffff000086514400 x26: 0000000000000300 x25: ffff0000809f9e78 x24: 0000000000000000 x23: ffff80000d783680 x22: ffff000086514400 x21: 00000000ffffffe4 x20: ffff80000d7836a0 x19: ffff0000809f9e00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000800 x12: ffff0000f2600a00 x11: 000000000000fc96 x10: 0000000000000000 x9 : ffff800001295c18 x8 : 0000000000000000 x7 : 0000000000000300 x6 : 0000000000000000 x5 : 0000000000000000 x4 : ffff0000f1034e20 x3 : ffff0000f1034600 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000600000 Call trace: ttm_resource_fini+0x5c/0xac [ttm] ttm_range_man_alloc+0x128/0x1e0 [ttm] ttm_resource_alloc+0x58/0x90 [ttm] ttm_bo_mem_space+0xc8/0x3e4 [ttm] ttm_bo_validate+0xb4/0x134 [ttm] vmw_bo_pin_in_start_of_vram+0xbc/0x200 [vmwgfx] vmw_framebuffer_pin+0xc0/0x154 [vmwgfx] vmw_ldu_primary_plane_atomic_update+0x8c/0x6e0 [vmwgfx] drm_atomic_helper_commit_planes+0x11c/0x2e0 drm_atomic_helper_commit_tail+0x60/0xb0 commit_tail+0x1b0/0x210 drm_atomic_helper_commit+0x168/0x400 drm_atomic_commit+0x64/0x74 drm_atomic_helper_set_config+0xdc/0x11c drm_mode_setcrtc+0x1c4/0x780 drm_ioctl_kernel+0xd0/0x1a0 drm_ioctl+0x2c4/0x690 vmw_generic_ioctl+0xe0/0x174 [vmwgfx] vmw_unlocked_ioctl+0x24/0x30 [vmwgfx] __arm64_sys_ioctl+0xb4/0x100 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x34/0x9c el0_svc+0x48/0x1b0 el0t_64_sync_handler+0xa4/0x130 el0t_64_sync+0x1a4/0x1a8 Code: 35000260 f9401a81 52800002 f9403a60 (f9406c23) ---[ end trace 0000000000000000 ]---
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: de3688e469b0 ("drm/ttm: add ttm_resource_fini v2") Cc: Christian König christian.koenig@amd.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Reviewed-by: Martin Krastev krastevm@vmware.com --- drivers/gpu/drm/ttm/ttm_range_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 8cd4f3fb9f79..d91666721dc6 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -89,7 +89,7 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, spin_unlock(&rman->lock);
if (unlikely(ret)) { - ttm_resource_fini(man, *res); + ttm_resource_fini(man, &node->base); kfree(node); return ret; }
Am 18.03.22 um 18:43 schrieb Zack Rusin:
From: Zack Rusin zackr@vmware.com
The res is initialized here only if there's no errors so passing it to ttm_resource_fini in the error paths results in a kernel oops. In the error paths, instead of the unitialized res, we have to use to use node->base on which ttm_resource_init was called.
Sample affected backtrace: Unable to handle kernel NULL pointer dereference at virtual address 00000000000000d8 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000106ac0000 [00000000000000d8] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: bnep vsock_loopback vmw_vsock_virtio_transport_common vsock snd_hda_codec_generic snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep > CPU: 0 PID: 1197 Comm: gnome-shell Tainted: G U 5.17.0-rc2-vmwgfx #2 Hardware name: VMware, Inc. VBSA/VBSA, BIOS VEFI 12/31/2020 pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ttm_resource_fini+0x5c/0xac [ttm] lr : ttm_range_man_alloc+0x128/0x1e0 [ttm] sp : ffff80000d783510 x29: ffff80000d783510 x28: 0000000000000000 x27: ffff000086514400 x26: 0000000000000300 x25: ffff0000809f9e78 x24: 0000000000000000 x23: ffff80000d783680 x22: ffff000086514400 x21: 00000000ffffffe4 x20: ffff80000d7836a0 x19: ffff0000809f9e00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000800 x12: ffff0000f2600a00 x11: 000000000000fc96 x10: 0000000000000000 x9 : ffff800001295c18 x8 : 0000000000000000 x7 : 0000000000000300 x6 : 0000000000000000 x5 : 0000000000000000 x4 : ffff0000f1034e20 x3 : ffff0000f1034600 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000600000 Call trace: ttm_resource_fini+0x5c/0xac [ttm] ttm_range_man_alloc+0x128/0x1e0 [ttm] ttm_resource_alloc+0x58/0x90 [ttm] ttm_bo_mem_space+0xc8/0x3e4 [ttm] ttm_bo_validate+0xb4/0x134 [ttm] vmw_bo_pin_in_start_of_vram+0xbc/0x200 [vmwgfx] vmw_framebuffer_pin+0xc0/0x154 [vmwgfx] vmw_ldu_primary_plane_atomic_update+0x8c/0x6e0 [vmwgfx] drm_atomic_helper_commit_planes+0x11c/0x2e0 drm_atomic_helper_commit_tail+0x60/0xb0 commit_tail+0x1b0/0x210 drm_atomic_helper_commit+0x168/0x400 drm_atomic_commit+0x64/0x74 drm_atomic_helper_set_config+0xdc/0x11c drm_mode_setcrtc+0x1c4/0x780 drm_ioctl_kernel+0xd0/0x1a0 drm_ioctl+0x2c4/0x690 vmw_generic_ioctl+0xe0/0x174 [vmwgfx] vmw_unlocked_ioctl+0x24/0x30 [vmwgfx] __arm64_sys_ioctl+0xb4/0x100 invoke_syscall+0x78/0x100 el0_svc_common.constprop.0+0x54/0x184 do_el0_svc+0x34/0x9c el0_svc+0x48/0x1b0 el0t_64_sync_handler+0xa4/0x130 el0t_64_sync+0x1a4/0x1a8 Code: 35000260 f9401a81 52800002 f9403a60 (f9406c23) ---[ end trace 0000000000000000 ]---
Signed-off-by: Zack Rusin zackr@vmware.com Fixes: de3688e469b0 ("drm/ttm: add ttm_resource_fini v2") Cc: Christian König christian.koenig@amd.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Reviewed-by: Martin Krastev krastevm@vmware.com
Reviewed-by: Christian König christian.koenig@amd.com and pushed to drm-misc-next-fixes.
drivers/gpu/drm/ttm/ttm_range_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c index 8cd4f3fb9f79..d91666721dc6 100644 --- a/drivers/gpu/drm/ttm/ttm_range_manager.c +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -89,7 +89,7 @@ static int ttm_range_man_alloc(struct ttm_resource_manager *man, spin_unlock(&rman->lock);
if (unlikely(ret)) {
ttm_resource_fini(man, *res);
kfree(node); return ret; }ttm_resource_fini(man, &node->base);
dri-devel@lists.freedesktop.org