From: Tian Tao tiantao6@hisilicon.com
[ Upstream commit 7d614ab2f20503ed8766363d41f8607337571adf ]
fixed the below warning: drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c:84:2-8: WARNING: NULL check before some freeing functions is not needed.
Signed-off-by: Tian Tao tiantao6@hisilicon.com Acked-by: Christian König christian.koenig@amd.com Signed-off-by: Lucas Stach l.stach@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index b390dd4d60b7..d741b1d735f7 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -80,8 +80,7 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj) /* Don't drop the pages for imported dmabuf, as they are not * ours, just free the array we allocated: */ - if (etnaviv_obj->pages) - kvfree(etnaviv_obj->pages); + kvfree(etnaviv_obj->pages);
drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt); }
From: Thomas Zimmermann tzimmermann@suse.de
[ Upstream commit 13b29cc3a722c2c0bc9ab9f72f9047d55d08a2f9 ]
Selecting DRM_FBDEV_EMULATION will include the correct settings for fbdev emulation. Drivers should not override this.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Acked-by: Stefan Agner stefan@agner.ch Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20210415110040.23525-3-tzimmer... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/mxsfb/Kconfig | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/mxsfb/Kconfig b/drivers/gpu/drm/mxsfb/Kconfig index 0143d539f8f8..ee22cd25d3e3 100644 --- a/drivers/gpu/drm/mxsfb/Kconfig +++ b/drivers/gpu/drm/mxsfb/Kconfig @@ -10,7 +10,6 @@ config DRM_MXSFB depends on COMMON_CLK select DRM_MXS select DRM_KMS_HELPER - select DRM_KMS_FB_HELPER select DRM_KMS_CMA_HELPER select DRM_PANEL select DRM_PANEL_BRIDGE
From: Thomas Zimmermann tzimmermann@suse.de
[ Upstream commit a50e74bec1d17e95275909660c6b43ffe11ebcf0 ]
Selecting DRM_FBDEV_EMULATION will include the correct settings for fbdev emulation. Drivers should not override this.
Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20210415110040.23525-4-tzimmer... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/zte/Kconfig | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/zte/Kconfig b/drivers/gpu/drm/zte/Kconfig index 90ebaedc11fd..aa8594190b50 100644 --- a/drivers/gpu/drm/zte/Kconfig +++ b/drivers/gpu/drm/zte/Kconfig @@ -3,7 +3,6 @@ config DRM_ZTE tristate "DRM Support for ZTE SoCs" depends on DRM && ARCH_ZX select DRM_KMS_CMA_HELPER - select DRM_KMS_FB_HELPER select DRM_KMS_HELPER select SND_SOC_HDMI_CODEC if SND_SOC select VIDEOMODE_HELPERS
From: KuoHsiang Chou kuohsiang_chou@aspeedtech.com
[ Upstream commit ba4e0339a6a33e2ba341703ce14ae8ca203cb2f1 ]
[Bug][DP501] If ASPEED P2A (PCI to AHB) bridge is disabled and disallowed for CVE_2019_6260 item3, and then the monitor's EDID is unable read through Parade DP501. The reason is the DP501's FW is mapped to BMC addressing space rather than Host addressing space. The resolution is that using "pci_iomap_range()" maps to DP501's FW that stored on the end of FB (Frame Buffer). In this case, FrameBuffer reserves the last 2MB used for the image of DP501.
Signed-off-by: KuoHsiang Chou kuohsiang_chou@aspeedtech.com Reported-by: kernel test robot lkp@intel.com Signed-off-by: Thomas Zimmermann tzimmermann@suse.de Link: https://patchwork.freedesktop.org/patch/msgid/20210421085859.17761-1-kuohsia... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/ast/ast_dp501.c | 139 +++++++++++++++++++++++--------- drivers/gpu/drm/ast/ast_drv.h | 12 +++ drivers/gpu/drm/ast/ast_main.c | 11 ++- 3 files changed, 125 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/ast/ast_dp501.c b/drivers/gpu/drm/ast/ast_dp501.c index 88121c0e0d05..cd93c44f2662 100644 --- a/drivers/gpu/drm/ast/ast_dp501.c +++ b/drivers/gpu/drm/ast/ast_dp501.c @@ -189,6 +189,9 @@ bool ast_backup_fw(struct drm_device *dev, u8 *addr, u32 size) u32 i, data; u32 boot_address;
+ if (ast->config_mode != ast_use_p2a) + return false; + data = ast_mindwm(ast, 0x1e6e2100) & 0x01; if (data) { boot_address = get_fw_base(ast); @@ -207,6 +210,9 @@ static bool ast_launch_m68k(struct drm_device *dev) u8 *fw_addr = NULL; u8 jreg;
+ if (ast->config_mode != ast_use_p2a) + return false; + data = ast_mindwm(ast, 0x1e6e2100) & 0x01; if (!data) {
@@ -271,25 +277,55 @@ u8 ast_get_dp501_max_clk(struct drm_device *dev) struct ast_private *ast = to_ast_private(dev); u32 boot_address, offset, data; u8 linkcap[4], linkrate, linklanes, maxclk = 0xff; + u32 *plinkcap;
- boot_address = get_fw_base(ast); - - /* validate FW version */ - offset = 0xf000; - data = ast_mindwm(ast, boot_address + offset); - if ((data & 0xf0) != 0x10) /* version: 1x */ - return maxclk; - - /* Read Link Capability */ - offset = 0xf014; - *(u32 *)linkcap = ast_mindwm(ast, boot_address + offset); - if (linkcap[2] == 0) { - linkrate = linkcap[0]; - linklanes = linkcap[1]; - data = (linkrate == 0x0a) ? (90 * linklanes) : (54 * linklanes); - if (data > 0xff) - data = 0xff; - maxclk = (u8)data; + if (ast->config_mode == ast_use_p2a) { + boot_address = get_fw_base(ast); + + /* validate FW version */ + offset = AST_DP501_GBL_VERSION; + data = ast_mindwm(ast, boot_address + offset); + if ((data & AST_DP501_FW_VERSION_MASK) != AST_DP501_FW_VERSION_1) /* version: 1x */ + return maxclk; + + /* Read Link Capability */ + offset = AST_DP501_LINKRATE; + plinkcap = (u32 *)linkcap; + *plinkcap = ast_mindwm(ast, boot_address + offset); + if (linkcap[2] == 0) { + linkrate = linkcap[0]; + linklanes = linkcap[1]; + data = (linkrate == 0x0a) ? (90 * linklanes) : (54 * linklanes); + if (data > 0xff) + data = 0xff; + maxclk = (u8)data; + } + } else { + if (!ast->dp501_fw_buf) + return AST_DP501_DEFAULT_DCLK; /* 1024x768 as default */ + + /* dummy read */ + offset = 0x0000; + data = readl(ast->dp501_fw_buf + offset); + + /* validate FW version */ + offset = AST_DP501_GBL_VERSION; + data = readl(ast->dp501_fw_buf + offset); + if ((data & AST_DP501_FW_VERSION_MASK) != AST_DP501_FW_VERSION_1) /* version: 1x */ + return maxclk; + + /* Read Link Capability */ + offset = AST_DP501_LINKRATE; + plinkcap = (u32 *)linkcap; + *plinkcap = readl(ast->dp501_fw_buf + offset); + if (linkcap[2] == 0) { + linkrate = linkcap[0]; + linklanes = linkcap[1]; + data = (linkrate == 0x0a) ? (90 * linklanes) : (54 * linklanes); + if (data > 0xff) + data = 0xff; + maxclk = (u8)data; + } } return maxclk; } @@ -298,26 +334,57 @@ bool ast_dp501_read_edid(struct drm_device *dev, u8 *ediddata) { struct ast_private *ast = to_ast_private(dev); u32 i, boot_address, offset, data; + u32 *pEDIDidx;
- boot_address = get_fw_base(ast); - - /* validate FW version */ - offset = 0xf000; - data = ast_mindwm(ast, boot_address + offset); - if ((data & 0xf0) != 0x10) - return false; - - /* validate PnP Monitor */ - offset = 0xf010; - data = ast_mindwm(ast, boot_address + offset); - if (!(data & 0x01)) - return false; + if (ast->config_mode == ast_use_p2a) { + boot_address = get_fw_base(ast);
- /* Read EDID */ - offset = 0xf020; - for (i = 0; i < 128; i += 4) { - data = ast_mindwm(ast, boot_address + offset + i); - *(u32 *)(ediddata + i) = data; + /* validate FW version */ + offset = AST_DP501_GBL_VERSION; + data = ast_mindwm(ast, boot_address + offset); + if ((data & AST_DP501_FW_VERSION_MASK) != AST_DP501_FW_VERSION_1) + return false; + + /* validate PnP Monitor */ + offset = AST_DP501_PNPMONITOR; + data = ast_mindwm(ast, boot_address + offset); + if (!(data & AST_DP501_PNP_CONNECTED)) + return false; + + /* Read EDID */ + offset = AST_DP501_EDID_DATA; + for (i = 0; i < 128; i += 4) { + data = ast_mindwm(ast, boot_address + offset + i); + pEDIDidx = (u32 *)(ediddata + i); + *pEDIDidx = data; + } + } else { + if (!ast->dp501_fw_buf) + return false; + + /* dummy read */ + offset = 0x0000; + data = readl(ast->dp501_fw_buf + offset); + + /* validate FW version */ + offset = AST_DP501_GBL_VERSION; + data = readl(ast->dp501_fw_buf + offset); + if ((data & AST_DP501_FW_VERSION_MASK) != AST_DP501_FW_VERSION_1) + return false; + + /* validate PnP Monitor */ + offset = AST_DP501_PNPMONITOR; + data = readl(ast->dp501_fw_buf + offset); + if (!(data & AST_DP501_PNP_CONNECTED)) + return false; + + /* Read EDID */ + offset = AST_DP501_EDID_DATA; + for (i = 0; i < 128; i += 4) { + data = readl(ast->dp501_fw_buf + offset + i); + pEDIDidx = (u32 *)(ediddata + i); + *pEDIDidx = data; + } }
return true; diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index e82ab8628770..911f9f414774 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -150,6 +150,7 @@ struct ast_private {
void __iomem *regs; void __iomem *ioregs; + void __iomem *dp501_fw_buf;
enum ast_chip chip; bool vga2_clone; @@ -325,6 +326,17 @@ int ast_mode_config_init(struct ast_private *ast); #define AST_MM_ALIGN_SHIFT 4 #define AST_MM_ALIGN_MASK ((1 << AST_MM_ALIGN_SHIFT) - 1)
+#define AST_DP501_FW_VERSION_MASK GENMASK(7, 4) +#define AST_DP501_FW_VERSION_1 BIT(4) +#define AST_DP501_PNP_CONNECTED BIT(1) + +#define AST_DP501_DEFAULT_DCLK 65 + +#define AST_DP501_GBL_VERSION 0xf000 +#define AST_DP501_PNPMONITOR 0xf010 +#define AST_DP501_LINKRATE 0xf014 +#define AST_DP501_EDID_DATA 0xf020 + int ast_mm_init(struct ast_private *ast);
/* ast post */ diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 0ac3c2039c4b..3976a258799f 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -99,7 +99,7 @@ static void ast_detect_config_mode(struct drm_device *dev, u32 *scu_rev) if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) { /* Double check it's actually working */ data = ast_read32(ast, 0xf004); - if (data != 0xFFFFFFFF) { + if ((data != 0xFFFFFFFF) && (data != 0x00)) { /* P2A works, grab silicon revision */ ast->config_mode = ast_use_p2a;
@@ -411,6 +411,7 @@ struct ast_private *ast_device_create(const struct drm_driver *drv, return ast; dev = &ast->base;
+ dev->pdev = pdev; pci_set_drvdata(pdev, dev);
ast->regs = pci_iomap(pdev, 1, 0); @@ -450,6 +451,14 @@ struct ast_private *ast_device_create(const struct drm_driver *drv, if (ret) return ERR_PTR(ret);
+ /* map reserved buffer */ + ast->dp501_fw_buf = NULL; + if (dev->vram_mm->vram_size < pci_resource_len(dev->pdev, 0)) { + ast->dp501_fw_buf = pci_iomap_range(dev->pdev, 0, dev->vram_mm->vram_size, 0); + if (!ast->dp501_fw_buf) + drm_info(dev, "failed to map reserved buffer!\n"); + } + ret = ast_mode_config_init(ast); if (ret) return ERR_PTR(ret);
From: Brandon Syu Brandon.Syu@amd.com
[ Upstream commit 99c248c41c2199bd34232ce8e729d18c4b343b64 ]
[why] When setup is called after hdcp has already setup, it would cause to disable HDCP flow won’t execute.
[how] Don't clean up hdcp content to be 0.
Signed-off-by: Brandon Syu Brandon.Syu@amd.com Reviewed-by: Wenjing Liu Wenjing.Liu@amd.com Acked-by: Wayne Lin waynelin@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 68a6481d7f8f..b963226e8af4 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -260,7 +260,6 @@ enum mod_hdcp_status mod_hdcp_setup(struct mod_hdcp *hdcp, struct mod_hdcp_output output; enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
- memset(hdcp, 0, sizeof(struct mod_hdcp)); memset(&output, 0, sizeof(output)); hdcp->config = *config; HDCP_TOP_INTERFACE_TRACE(hdcp);
From: Lewis Huang Lewis.Huang@amd.com
[ Upstream commit d5433a9f692f57c814286f8af2746c567ef79fc8 ]
[Why] This change only fix dpp clock switch to lower case. New solution later can fix both case, which is "dc: skip program clock when allow seamless boot"
[How] This reverts commit "dc: wait vblank when stream enabled and update dpp clock"
Signed-off-by: Lewis Huang Lewis.Huang@amd.com Reviewed-by: Eric Yang eric.yang2@amd.com Acked-by: Wayne Lin waynelin@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 10 +--------- drivers/gpu/drm/amd/display/dc/core/dc.c | 13 ------------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - 3 files changed, 1 insertion(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index a06e86853bb9..49d19fdd750b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -128,7 +128,7 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count, i; + int display_count; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -210,14 +210,6 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.dppclk_khz, safe_to_lower);
- for (i = 0; i < context->stream_count; i++) { - if (context->streams[i]->signal == SIGNAL_TYPE_EDP && - context->streams[i]->apply_seamless_boot_optimization) { - dc_wait_for_vblank(dc, context->streams[i]); - break; - } - } - clk_mgr_base->clks.actual_dppclk_khz = rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4713f09bcbf1..e57df2f6f824 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3219,19 +3219,6 @@ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) } }
-void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) - if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream) { - struct timing_generator *tg = - dc->current_state->res_ctx.pipe_ctx[i].stream_res.tg; - tg->funcs->wait_for_state(tg, CRTC_STATE_VBLANK); - break; - } -} - void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info) { info->displayClock = (unsigned int)state->bw_ctx.bw.dcn.clk.dispclk_khz; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 100d434f7a03..65f801b50686 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -719,7 +719,6 @@ void dc_init_callbacks(struct dc *dc, void dc_deinit_callbacks(struct dc *dc); void dc_destroy(struct dc **dc);
-void dc_wait_for_vblank(struct dc *dc, struct dc_stream_state *stream); /******************************************************************************* * Surface Interfaces ******************************************************************************/
From: Chris Park Chris.Park@amd.com
[ Upstream commit b2d4b9f72fb14c1e6e4f0128964a84539a72d831 ]
[Why] CLK mgr is null for server settings.
[How] Guard the function with NULL check.
Signed-off-by: Chris Park Chris.Park@amd.com Reviewed-by: Nicholas Kazlauskas Nicholas.Kazlauskas@amd.com Acked-by: Wayne Lin waynelin@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e57df2f6f824..a869702d77af 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3274,7 +3274,7 @@ void dc_allow_idle_optimizations(struct dc *dc, bool allow) if (dc->debug.disable_idle_power_optimizations) return;
- if (dc->clk_mgr->funcs->is_smu_present) + if (dc->clk_mgr != NULL && dc->clk_mgr->funcs->is_smu_present) if (!dc->clk_mgr->funcs->is_smu_present(dc->clk_mgr)) return;
From: Jack Zhang Jack.Zhang1@amd.com
[ Upstream commit 95ea3dbc4e9548d35ab6fbf67675cef8c293e2f5 ]
Disable all ip's hw status to false before any hw_init. Only set it to true until its hw_init is executed.
The old 5.9 branch has this change but somehow the 5.11 kernrel does not have this fix.
Without this change, sriov tdr have gfx IB test fail.
Signed-off-by: Jack Zhang Jack.Zhang1@amd.com Review-by: Emily Deng Emily.Deng@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 57ec108b5972..f008f001951b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, };
- for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + for (i = 0; i < adev->num_ip_blocks; i++) { int j; struct amdgpu_ip_block *block;
From: Dan Carpenter dan.carpenter@oracle.com
[ Upstream commit e590c2b03a6143ba93ddad306bc9eaafa838c020 ]
Cppcheck complains that the declaration doesn't match the function definition. Obviously "left" should come before "right". The caller and the function implementation are done this way, it's just the declaration which is wrong so this doesn't affect runtime.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Maxime Ripard maxime@cerno.tech Link: https://patchwork.freedesktop.org/patch/msgid/YH/720FD978TPhHp@mwanda Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vc4/vc4_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index a7500716cf3f..5dceadc61600 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -825,7 +825,7 @@ void vc4_crtc_destroy_state(struct drm_crtc *crtc, void vc4_crtc_reset(struct drm_crtc *crtc); void vc4_crtc_handle_vblank(struct vc4_crtc *crtc); void vc4_crtc_get_margins(struct drm_crtc_state *state, - unsigned int *right, unsigned int *left, + unsigned int *left, unsigned int *right, unsigned int *top, unsigned int *bottom);
/* vc4_debugfs.c */
From: Liu Ying victor.liu@nxp.com
[ Upstream commit 3afb2a28fa2404d11cce1956a003f2aaca4da421 ]
This patch replaces ->mode_fixup() with ->atomic_check() so that a full modeset can be requested from there when crtc_state->active is changed to be true(which implies only connector's DPMS is brought out of "Off" status, though not necessarily). Bridge functions are added or changed to accommodate the ->atomic_check() callback. That full modeset is needed by the up-coming patch which gets MIPI DSI controller and PHY ready in ->mode_set(), because it makes sure ->mode_set() and ->atomic_disable() are called in pairs.
Cc: Andrzej Hajda a.hajda@samsung.com Cc: Neil Armstrong narmstrong@baylibre.com Cc: Robert Foss robert.foss@linaro.org Cc: Laurent Pinchart Laurent.pinchart@ideasonboard.com Cc: Jonas Karlman jonas@kwiboo.se Cc: Jernej Skrabec jernej.skrabec@siol.net Cc: David Airlie airlied@linux.ie Cc: Daniel Vetter daniel@ffwll.ch Cc: Guido Günther agx@sigxcpu.org Cc: Robert Chiras robert.chiras@nxp.com Cc: NXP Linux Team linux-imx@nxp.com Signed-off-by: Liu Ying victor.liu@nxp.com Reviewed-by: Neil Armstrong narmstrong@baylibre.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Link: https://patchwork.freedesktop.org/patch/msgid/1619170003-4817-2-git-send-ema... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/nwl-dsi.c | 61 ++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index 66b67402f1ac..c65ca860712d 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -21,6 +21,7 @@ #include <linux/sys_soc.h> #include <linux/time64.h>
+#include <drm/drm_atomic_state_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_mipi_dsi.h> #include <drm/drm_of.h> @@ -742,7 +743,9 @@ static int nwl_dsi_disable(struct nwl_dsi *dsi) return 0; }
-static void nwl_dsi_bridge_disable(struct drm_bridge *bridge) +static void +nwl_dsi_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct nwl_dsi *dsi = bridge_to_dsi(bridge); int ret; @@ -803,17 +806,6 @@ static int nwl_dsi_get_dphy_params(struct nwl_dsi *dsi, return 0; }
-static bool nwl_dsi_bridge_mode_fixup(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - /* At least LCDIF + NWL needs active high sync */ - adjusted_mode->flags |= (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC); - adjusted_mode->flags &= ~(DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC); - - return true; -} - static enum drm_mode_status nwl_dsi_bridge_mode_valid(struct drm_bridge *bridge, const struct drm_display_info *info, @@ -831,6 +823,24 @@ nwl_dsi_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; }
+static int nwl_dsi_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode; + + /* At least LCDIF + NWL needs active high sync */ + adjusted_mode->flags |= (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC); + adjusted_mode->flags &= ~(DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC); + + /* Do a full modeset if crtc_state->active is changed to be true. */ + if (crtc_state->active_changed && crtc_state->active) + crtc_state->mode_changed = true; + + return 0; +} + static void nwl_dsi_bridge_mode_set(struct drm_bridge *bridge, const struct drm_display_mode *mode, @@ -862,7 +872,9 @@ nwl_dsi_bridge_mode_set(struct drm_bridge *bridge, drm_mode_debug_printmodeline(adjusted_mode); }
-static void nwl_dsi_bridge_pre_enable(struct drm_bridge *bridge) +static void +nwl_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct nwl_dsi *dsi = bridge_to_dsi(bridge); int ret; @@ -897,7 +909,9 @@ static void nwl_dsi_bridge_pre_enable(struct drm_bridge *bridge) } }
-static void nwl_dsi_bridge_enable(struct drm_bridge *bridge) +static void +nwl_dsi_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct nwl_dsi *dsi = bridge_to_dsi(bridge); int ret; @@ -942,14 +956,17 @@ static void nwl_dsi_bridge_detach(struct drm_bridge *bridge) }
static const struct drm_bridge_funcs nwl_dsi_bridge_funcs = { - .pre_enable = nwl_dsi_bridge_pre_enable, - .enable = nwl_dsi_bridge_enable, - .disable = nwl_dsi_bridge_disable, - .mode_fixup = nwl_dsi_bridge_mode_fixup, - .mode_set = nwl_dsi_bridge_mode_set, - .mode_valid = nwl_dsi_bridge_mode_valid, - .attach = nwl_dsi_bridge_attach, - .detach = nwl_dsi_bridge_detach, + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_check = nwl_dsi_bridge_atomic_check, + .atomic_pre_enable = nwl_dsi_bridge_atomic_pre_enable, + .atomic_enable = nwl_dsi_bridge_atomic_enable, + .atomic_disable = nwl_dsi_bridge_atomic_disable, + .mode_set = nwl_dsi_bridge_mode_set, + .mode_valid = nwl_dsi_bridge_mode_valid, + .attach = nwl_dsi_bridge_attach, + .detach = nwl_dsi_bridge_detach, };
static int nwl_dsi_parse_dt(struct nwl_dsi *dsi)
From: Philipp Zabel p.zabel@pengutronix.de
[ Upstream commit 06841148c570832d4d247b0f6befc1922a84120b ]
Only planes that are displayed via the Display Processor (DP) path support color space conversion. Limit formats on planes that are shown via the direct Display Controller (DC) path to RGB.
Reported-by: Fabio Estevam festevam@gmail.com Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/imx/ipuv3-plane.c | 41 ++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index fa5009705365..fc8f4834ed7b 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -35,7 +35,7 @@ static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p) return container_of(p, struct ipu_plane, base); }
-static const uint32_t ipu_plane_formats[] = { +static const uint32_t ipu_plane_all_formats[] = { DRM_FORMAT_ARGB1555, DRM_FORMAT_XRGB1555, DRM_FORMAT_ABGR1555, @@ -72,6 +72,31 @@ static const uint32_t ipu_plane_formats[] = { DRM_FORMAT_BGRX8888_A8, };
+static const uint32_t ipu_plane_rgb_formats[] = { + DRM_FORMAT_ARGB1555, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ABGR1555, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_RGBA5551, + DRM_FORMAT_BGRA5551, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_BGRA8888, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB565_A8, + DRM_FORMAT_BGR565_A8, + DRM_FORMAT_RGB888_A8, + DRM_FORMAT_BGR888_A8, + DRM_FORMAT_RGBX8888_A8, + DRM_FORMAT_BGRX8888_A8, +}; + static const uint64_t ipu_format_modifiers[] = { DRM_FORMAT_MOD_LINEAR, DRM_FORMAT_MOD_INVALID @@ -830,16 +855,24 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu, struct ipu_plane *ipu_plane; const uint64_t *modifiers = ipu_format_modifiers; unsigned int zpos = (type == DRM_PLANE_TYPE_PRIMARY) ? 0 : 1; + unsigned int format_count; + const uint32_t *formats; int ret;
DRM_DEBUG_KMS("channel %d, dp flow %d, possible_crtcs=0x%x\n", dma, dp, possible_crtcs);
+ if (dp == IPU_DP_FLOW_SYNC_BG || dp == IPU_DP_FLOW_SYNC_FG) { + formats = ipu_plane_all_formats; + format_count = ARRAY_SIZE(ipu_plane_all_formats); + } else { + formats = ipu_plane_rgb_formats; + format_count = ARRAY_SIZE(ipu_plane_rgb_formats); + } ipu_plane = drmm_universal_plane_alloc(dev, struct ipu_plane, base, possible_crtcs, &ipu_plane_funcs, - ipu_plane_formats, - ARRAY_SIZE(ipu_plane_formats), - modifiers, type, NULL); + formats, format_count, modifiers, + type, NULL); if (IS_ERR(ipu_plane)) { DRM_ERROR("failed to allocate and initialize %s plane\n", zpos ? "overlay" : "primary");
From: Sebastian Reichel sebastian.reichel@collabora.com
[ Upstream commit 94dfec48fca756cef90263a03e81f24dae24a5c6 ]
Some standard resolutions like 1366x768 do not work properly with i.MX6 SoCs, since the horizontal resolution needs to be aligned to 8 pixels (so 1360x768 or 1368x768 would work).
This patch allocates framebuffers allocated to 8 pixels. The extra time required to send the extra pixels are removed from the blank time. In order to expose the correct display size to userspace, the stride is increased without increasing the width.
Without this patch systems with this display resolution hang indefinitely during boot up.
Suggested-by: Boris Brezillon boris.brezillon@collabora.com Signed-off-by: Sebastian Reichel sebastian.reichel@collabora.com Link: https://lore.kernel.org/r/20210428222953.235280-3-sebastian.reichel@collabor... Signed-off-by: Philipp Zabel p.zabel@pengutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/imx/imx-drm-core.c | 19 ++++++++++++++++++- drivers/gpu/drm/imx/imx-ldb.c | 5 +++++ drivers/gpu/drm/imx/ipuv3-crtc.c | 11 ++++++++++- drivers/gpu/drm/imx/ipuv3-plane.c | 19 +++++++++++++++---- drivers/gpu/ipu-v3/ipu-dc.c | 5 +++++ drivers/gpu/ipu-v3/ipu-di.c | 7 +++++++ 6 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index e6a88c8cbd69..8457b9788cda 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -145,9 +145,26 @@ static const struct drm_ioctl_desc imx_drm_ioctls[] = { /* none so far */ };
+static int imx_drm_dumb_create(struct drm_file *file_priv, + struct drm_device *drm, + struct drm_mode_create_dumb *args) +{ + u32 width = args->width; + int ret; + + args->width = ALIGN(width, 8); + + ret = drm_gem_cma_dumb_create(file_priv, drm, args); + if (ret) + return ret; + + args->width = width; + return ret; +} + static const struct drm_driver imx_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, - DRM_GEM_CMA_DRIVER_OPS, + DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(imx_drm_dumb_create), .ioctls = imx_drm_ioctls, .num_ioctls = ARRAY_SIZE(imx_drm_ioctls), .fops = &imx_drm_driver_fops, diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index ffdc492c5bc5..53132ddf9587 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -274,6 +274,11 @@ imx_ldb_encoder_atomic_mode_set(struct drm_encoder *encoder, "%s: mode exceeds 85 MHz pixel clock\n", __func__); }
+ if (!IS_ALIGNED(mode->hdisplay, 8)) { + dev_warn(ldb->dev, + "%s: hdisplay does not align to 8 byte\n", __func__); + } + if (dual) { serial_clk = 3500UL * mode->clock; imx_ldb_set_clock(ldb, mux, 0, serial_clk, di_clk); diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c index e6431a227feb..9c8829f945b2 100644 --- a/drivers/gpu/drm/imx/ipuv3-crtc.c +++ b/drivers/gpu/drm/imx/ipuv3-crtc.c @@ -305,10 +305,19 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc) sig_cfg.vsync_pin = imx_crtc_state->di_vsync_pin;
drm_display_mode_to_videomode(mode, &sig_cfg.mode); + if (!IS_ALIGNED(sig_cfg.mode.hactive, 8)) { + unsigned int new_hactive = ALIGN(sig_cfg.mode.hactive, 8); + + dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n", + sig_cfg.mode.hactive, new_hactive); + + sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive; + sig_cfg.mode.hactive = new_hactive; + }
ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di, mode->flags & DRM_MODE_FLAG_INTERLACE, - imx_crtc_state->bus_format, mode->hdisplay); + imx_crtc_state->bus_format, sig_cfg.mode.hactive); ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg); }
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index fc8f4834ed7b..9a7150ac34a6 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -30,6 +30,11 @@ to_ipu_plane_state(struct drm_plane_state *p) return container_of(p, struct ipu_plane_state, base); }
+static unsigned int ipu_src_rect_width(const struct drm_plane_state *state) +{ + return ALIGN(drm_rect_width(&state->src) >> 16, 8); +} + static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p) { return container_of(p, struct ipu_plane, base); @@ -440,6 +445,12 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (old_fb && fb->pitches[0] != old_fb->pitches[0]) crtc_state->mode_changed = true;
+ if (ALIGN(fb->width, 8) * fb->format->cpp[0] > + fb->pitches[0] + fb->offsets[0]) { + dev_warn(dev, "pitch is not big enough for 8 pixels alignment"); + return -EINVAL; + } + switch (fb->format->format) { case DRM_FORMAT_YUV420: case DRM_FORMAT_YVU420: @@ -615,7 +626,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, if (ipu_state->use_pre) { axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, - drm_rect_width(&new_state->src) >> 16, + ipu_src_rect_width(new_state), drm_rect_height(&new_state->src) >> 16, fb->pitches[0], fb->format->format, fb->modifier, &eba); @@ -648,9 +659,9 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; }
- ipu_dmfc_config_wait4eot(ipu_plane->dmfc, drm_rect_width(dst)); + ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8));
- width = drm_rect_width(&new_state->src) >> 16; + width = ipu_src_rect_width(new_state); height = drm_rect_height(&new_state->src) >> 16; info = drm_format_info(fb->format->format); ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], @@ -715,7 +726,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
ipu_cpmem_zero(ipu_plane->alpha_ch); ipu_cpmem_set_resolution(ipu_plane->alpha_ch, - drm_rect_width(&new_state->src) >> 16, + ipu_src_rect_width(new_state), drm_rect_height(&new_state->src) >> 16); ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c index 34b4075a6a8e..ca96b235491a 100644 --- a/drivers/gpu/ipu-v3/ipu-dc.c +++ b/drivers/gpu/ipu-v3/ipu-dc.c @@ -167,6 +167,11 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
dc->di = ipu_di_get_num(di);
+ if (!IS_ALIGNED(width, 8)) { + dev_warn(priv->dev, + "%s: hactive does not align to 8 byte\n", __func__); + } + map = ipu_bus_format_to_map(bus_format);
/* diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index e617f60afeea..666223c6bec4 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -506,6 +506,13 @@ int ipu_di_adjust_videomode(struct ipu_di *di, struct videomode *mode) { u32 diff;
+ if (!IS_ALIGNED(mode->hactive, 8) && + mode->hfront_porch < ALIGN(mode->hactive, 8) - mode->hactive) { + dev_err(di->ipu->dev, "hactive %d is not aligned to 8 and front porch is too small to compensate\n", + mode->hactive); + return -EINVAL; + } + if (mode->vfront_porch >= 2) return 0;
From: Roman Li Roman.Li@amd.com
[ Upstream commit cf8b92a75646735136053ce51107bfa8cfc23191 ]
[Why] In gpu reset dc_lock acquired in dm_suspend(). Asynchronously handle_hpd_rx_irq can also be called through amdgpu_dm_irq_suspend->flush_work, which also tries to acquire dc_lock. That causes a deadlock.
[How] Check if amdgpu executing reset before acquiring dc_lock.
Signed-off-by: Lang Yu Lang.Yu@amd.com Signed-off-by: Roman Li Roman.Li@amd.com Reviewed-by: Qingqing Zhuo Qingqing.Zhuo@amd.com Acked-by: Wayne Lin Wayne.Lin@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 652cc1a0e450..875fd187463e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2726,13 +2726,15 @@ static void handle_hpd_rx_irq(void *param) } }
- mutex_lock(&adev->dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_lock(&adev->dm.dc_lock); #ifdef CONFIG_DRM_AMD_DC_HDCP result = dc_link_handle_hpd_rx_irq(dc_link, &hpd_irq_data, NULL); #else result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL); #endif - mutex_unlock(&adev->dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_unlock(&adev->dm.dc_lock);
out: if (result && !is_mst_root_connector) {
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit 67387dfe0f6630f2d4f412ce77debec23a49db7a ]
Change to 60s. This matches what we already do in virtualization. Infinite timeout can lead to deadlocks in the kernel.
Reviewed-by: Christian König christian.koenig@amd.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f008f001951b..d83f2ee150b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3181,8 +3181,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0;
/* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. + * By default timeout for non compute jobs is 10000 + * and 60000 for compute jobs. * In SR-IOV or passthrough mode, timeout for compute * jobs are 60000 by default. */ @@ -3191,10 +3191,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else if (amdgpu_passthrough(adev)) - adev->compute_timeout = msecs_to_jiffies(60000); else - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + adev->compute_timeout = msecs_to_jiffies(60000);
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f93883db2b46..57a055aa854d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -287,9 +287,9 @@ module_param_named(msi, amdgpu_msi, int, 0444); * for SDMA and Video. * * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. And there is no timeout enforced on compute jobs. + * jobs is 10000. The timeout for compute is 60000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " "for passthrough or sriov, 10000 for all jobs." " 0: keep default value. negative: infinity timeout), " "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
From: Ilya Bakoulin Ilya.Bakoulin@amd.com
[ Upstream commit c31bef1cb1203b26f901a511a3246204cfaf8a57 ]
[Why] Currently, the code that fills the clock table can miss filling information about some of the higher voltage states advertised by the SMU. This, in turn, may cause some of the higher pixel clock modes (e.g. 8k60) to fail validation.
[How] Fill the table with one entry per DCFCLK level instead of one entry per FCLK level. This is needed because the maximum FCLK does not necessarily need maximum voltage, whereas DCFCLK values from SMU cover the full voltage range.
Signed-off-by: Ilya Bakoulin Ilya.Bakoulin@amd.com Reviewed-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 80 ++++++++++++------- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 33 +++++--- 2 files changed, 74 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 49d19fdd750b..1f56ceab5922 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -834,46 +834,67 @@ static struct wm_table lpddr4_wm_table_rn = { }, } }; -static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) + +static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table, + unsigned int voltage) { int i; + uint32_t max_clk = 0;
- for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) { - if (clock_table->SocClocks[i].Vol == voltage) - return clock_table->SocClocks[i].Freq; + for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) { + if (clock_table->FClocks[i].Vol <= voltage) { + max_clk = clock_table->FClocks[i].Freq > max_clk ? + clock_table->FClocks[i].Freq : max_clk; + } }
- ASSERT(0); - return 0; + return max_clk; } -static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) + +static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table, + unsigned int voltage) { int i; + uint32_t max_clk = 0;
- for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) { - if (clock_table->DcfClocks[i].Vol == voltage) - return clock_table->DcfClocks[i].Freq; + for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) { + if (clock_table->MemClocks[i].Vol <= voltage) { + max_clk = clock_table->MemClocks[i].Freq > max_clk ? + clock_table->MemClocks[i].Freq : max_clk; + } }
- ASSERT(0); - return 0; + return max_clk; +} + +static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table, + unsigned int voltage) +{ + int i; + uint32_t max_clk = 0; + + for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) { + if (clock_table->SocClocks[i].Vol <= voltage) { + max_clk = clock_table->SocClocks[i].Freq > max_clk ? + clock_table->SocClocks[i].Freq : max_clk; + } + } + + return max_clk; }
static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info) { int i, j = 0; + unsigned int volt;
j = -1;
- ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ - - for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) { - if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) { + /* Find max DPM */ + for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) { + if (clock_table->DcfClocks[i].Freq != 0 && + clock_table->DcfClocks[i].Vol != 0) j = i; - break; - } }
if (j == -1) { @@ -884,13 +905,18 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
bw_params->clk_table.num_entries = j + 1;
- for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq; - bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol; - bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol); - bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table, - bw_params->clk_table.entries[i].voltage); + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + volt = clock_table->DcfClocks[i].Vol; + + bw_params->clk_table.entries[i].voltage = volt; + bw_params->clk_table.entries[i].dcfclk_mhz = + clock_table->DcfClocks[i].Freq; + bw_params->clk_table.entries[i].fclk_mhz = + find_max_fclk_for_voltage(clock_table, volt); + bw_params->clk_table.entries[i].memclk_mhz = + find_max_memclk_for_voltage(clock_table, volt); + bw_params->clk_table.entries[i].socclk_mhz = + find_max_socclk_for_voltage(clock_table, volt); }
bw_params->vram_type = bios_info->memory_type; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 8e3f1d0b4cc3..38a2aa87f5f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1575,10 +1575,12 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz; low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
- for (i = clk_table->num_entries; i > 1; i--) - clk_table->entries[i] = clk_table->entries[i-1]; - clk_table->entries[1] = clk_table->entries[0]; - clk_table->num_entries++; + if (clk_table->num_entries < MAX_NUM_DPM_LVL) { + for (i = clk_table->num_entries; i > 1; i--) + clk_table->entries[i] = clk_table->entries[i-1]; + clk_table->entries[1] = clk_table->entries[0]; + clk_table->num_entries++; + }
return low_pstate_lvl; } @@ -1610,10 +1612,6 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } }
- /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */ - if (i == 1) - k++; - clock_limits[k].state = k; clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; @@ -1630,14 +1628,25 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
k++; } - for (i = 0; i < clk_table->num_entries + 1; i++) - dcn2_1_soc.clock_limits[i] = clock_limits[i]; + + if (clk_table->num_entries >= MAX_NUM_DPM_LVL) { + for (i = 0; i < clk_table->num_entries + 1; i++) + dcn2_1_soc.clock_limits[i] = clock_limits[i]; + } else { + dcn2_1_soc.clock_limits[0] = clock_limits[0]; + for (i = 2; i < clk_table->num_entries + 1; i++) { + dcn2_1_soc.clock_limits[i] = clock_limits[i - 1]; + dcn2_1_soc.clock_limits[i].state = i; + } + } + if (clk_table->num_entries) { - dcn2_1_soc.num_states = clk_table->num_entries + 1; /* fill in min DF PState */ dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl); + dcn2_1_soc.num_states = clk_table->num_entries; /* duplicate last level */ - dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states; }
From: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com
[ Upstream commit 8809a7a4afe90ad9ffb42f72154d27e7c47551ae ]
Right now the flag simply selects memory config 0 when flag is true however 420 modes benefit more from memory config 3.
Signed-off-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Reviewed-by: Aric Cyr Aric.Cyr@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index efa86d5c6847..98ab4b776924 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -496,10 +496,13 @@ static enum lb_memory_config dpp1_dscl_find_lb_memory_config(struct dcn10_dpp *d int vtaps_c = scl_data->taps.v_taps_c; int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert); int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c); - enum lb_memory_config mem_cfg = LB_MEMORY_CONFIG_0;
- if (dpp->base.ctx->dc->debug.use_max_lb) - return mem_cfg; + if (dpp->base.ctx->dc->debug.use_max_lb) { + if (scl_data->format == PIXEL_FORMAT_420BPP8 + || scl_data->format == PIXEL_FORMAT_420BPP10) + return LB_MEMORY_CONFIG_3; + return LB_MEMORY_CONFIG_0; + }
dpp->base.caps->dscl_calc_lb_num_partitions( scl_data, LB_MEMORY_CONFIG_1, &num_part_y, &num_part_c);
From: Wang Li wangli74@huawei.com
[ Upstream commit 69777e6ca396f0a7e1baff40fcad4a9d3d445b7a ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Wang Li wangli74@huawei.com Signed-off-by: Chun-Kuang Hu chunkuang.hu@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 40df2c823187..474efb844249 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -260,7 +260,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) drm_connector_list_iter_end(&conn_iter); }
- ret = pm_runtime_get_sync(crtc->dev->dev); + ret = pm_runtime_resume_and_get(crtc->dev->dev); if (ret < 0) { DRM_ERROR("Failed to enable power domain: %d\n", ret); return ret;
From: Nicolas Boichat drinkcat@chromium.org
[ Upstream commit 09da3191827f2fd326205fb58881838e6ea36fb7 ]
GPUs with more than a single regulator (e.g. G72 on MT8183) will require platform-specific handling for devfreq, for 2 reasons: 1. The opp core (drivers/opp/core.c:_generic_set_opp_regulator) does not support multiple regulators, so we'll need custom handlers. 2. Generally, platforms with 2 regulators have platform-specific constraints on how the voltages should be set (e.g. minimum/maximum voltage difference between them), so we should not just create generic handlers that simply change the voltages without taking care of those constraints.
Disable devfreq for now on those GPUs.
Signed-off-by: Nicolas Boichat drinkcat@chromium.org Reviewed-by: Tomeu Vizoso tomeu.vizoso@collabora.com Reviewed-by: Steven Price steven.price@arm.com Signed-off-by: Steven Price steven.price@arm.com Link: https://patchwork.freedesktop.org/patch/msgid/20210421132841.v13.3.I3af068ab... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/panfrost/panfrost_devfreq.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c index 47d27e54a34f..3644652f726f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c +++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c @@ -92,6 +92,15 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev) struct thermal_cooling_device *cooling; struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+ if (pfdev->comp->num_supplies > 1) { + /* + * GPUs with more than 1 supply require platform-specific handling: + * continue without devfreq + */ + DRM_DEV_INFO(dev, "More than 1 supply is not supported yet\n"); + return 0; + } + ret = devm_pm_opp_set_regulators(dev, pfdev->comp->supply_names, pfdev->comp->num_supplies); if (ret) {
From: Zou Wei zou_wei@huawei.com
[ Upstream commit 8d0b1fe81e18eb66a2d4406386760795fe0d77d9 ]
This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zou Wei zou_wei@huawei.com Reviewed-by: Robert Foss robert.foss@linaro.org Signed-off-by: Robert Foss robert.foss@linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/1620801955-19188-1-git-send-em... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/lontium-lt9611.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index e8eb8deb444b..29b1ce2140ab 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -1215,6 +1215,7 @@ static struct i2c_device_id lt9611_id[] = { { "lontium,lt9611", 0 }, {} }; +MODULE_DEVICE_TABLE(i2c, lt9611_id);
static const struct of_device_id lt9611_match_table[] = { { .compatible = "lontium,lt9611" },
From: Xie Yongji xieyongji@bytedance.com
[ Upstream commit 17f46f488a5d82c5568e6e786cd760bba1c2ee09 ]
The dev->dev_private might not be allocated if virtio_gpu_pci_quirk() or virtio_gpu_init() failed. In this case, we should avoid the cleanup in virtio_gpu_release().
Signed-off-by: Xie Yongji xieyongji@bytedance.com Link: http://patchwork.freedesktop.org/patch/msgid/20210517084913.403-1-xieyongji@... Signed-off-by: Gerd Hoffmann kraxel@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/virtio/virtgpu_kms.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index b375394193be..aa532ad31a23 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -264,6 +264,9 @@ void virtio_gpu_release(struct drm_device *dev) { struct virtio_gpu_device *vgdev = dev->dev_private;
+ if (!vgdev) + return; + virtio_gpu_modeset_fini(vgdev); virtio_gpu_free_vbufs(vgdev); virtio_gpu_cleanup_cap_cache(vgdev);
From: Xie Yongji xieyongji@bytedance.com
[ Upstream commit cec7f1774605a5ef47c134af62afe7c75c30b0ee ]
The virtio_gpu_init() will free vgdev and vgdev->vbufs on failure. But such failure will be caught by virtio_gpu_probe() and then virtio_gpu_release() will be called to do some cleanup which will free vgdev and vgdev->vbufs again. So let's set dev->dev_private to NULL to avoid double free.
Signed-off-by: Xie Yongji xieyongji@bytedance.com Link: http://patchwork.freedesktop.org/patch/msgid/20210517084913.403-2-xieyongji@... Signed-off-by: Gerd Hoffmann kraxel@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/virtio/virtgpu_kms.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index aa532ad31a23..f3379059f324 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -234,6 +234,7 @@ int virtio_gpu_init(struct drm_device *dev) err_vbufs: vgdev->vdev->config->del_vqs(vgdev->vdev); err_vqs: + dev->dev_private = NULL; kfree(vgdev); return ret; }
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit 7230362c78d441020a47d7d5ca81f8a3d07bd9f0 ]
To stay consistent with the user's setting.
v2: rebase on multi-eDP support
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1337 Reviewed-by: Harry Wentland harry.wentland@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 875fd187463e..62663e287b21 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8967,6 +8967,12 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* Update audio instances for each connector. */ amdgpu_dm_commit_audio(dev, state);
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || \ + defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) + /* restore the backlight level */ + if (dm->backlight_dev) + amdgpu_dm_backlight_set_level(dm, dm->brightness[0]); +#endif /* * send vblank event on all events not handled in flip and * mark consumed event for drm_atomic_helper_commit_hw_done
From: Andrey Grodzovsky andrey.grodzovsky@amd.com
[ Upstream commit c61cdbdbffc169dc7f1e6fe94dfffaf574fe672a ]
Problem: If scheduler is already stopped by the time sched_entity is released and entity's job_queue not empty I encountred a hang in drm_sched_entity_flush. This is because drm_sched_entity_is_idle never becomes false.
Fix: In drm_sched_fini detach all sched_entities from the scheduler's run queues. This will satisfy drm_sched_entity_is_idle. Also wakeup all those processes stuck in sched_entity flushing as the scheduler main thread which wakes them up is stopped by now.
v2: Reverse order of drm_sched_rq_remove_entity and marking s_entity as stopped to prevent reinserion back to rq due to race.
v3: Drop drm_sched_rq_remove_entity, only modify entity->stopped and check for it in drm_sched_entity_is_idle
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com Reviewed-by: Christian König christian.koenig@amd.com Link: https://patchwork.freedesktop.org/patch/msgid/20210512142648.666476-14-andre... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- drivers/gpu/drm/scheduler/sched_main.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index f0790e9471d1..cb58f692dad9 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -116,7 +116,8 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) rmb(); /* for list_empty to work without lock */
if (list_empty(&entity->list) || - spsc_queue_count(&entity->job_queue) == 0) + spsc_queue_count(&entity->job_queue) == 0 || + entity->stopped) return true;
return false; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 92d8de24d0a1..c105c807d7e5 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -888,9 +888,33 @@ EXPORT_SYMBOL(drm_sched_init); */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { + struct drm_sched_entity *s_entity; + int i; + if (sched->thread) kthread_stop(sched->thread);
+ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + if (!rq) + continue; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) + /* + * Prevents reinsertion and marks job_queue as idle, + * it will removed from rq in drm_sched_entity_fini + * eventually + */ + s_entity->stopped = true; + spin_unlock(&rq->lock); + + } + + /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ + wake_up_all(&sched->job_scheduled); + /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr);
From: Andrey Grodzovsky andrey.grodzovsky@amd.com
[ Upstream commit 0b10ab80695d61422337ede6ff496552d8ace99d ]
Wait for all dependencies of a job to complete before killing it to avoid data corruptions.
Signed-off-by: Andrey Grodzovsky andrey.grodzovsky@amd.com Reviewed-by: Christian König christian.koenig@amd.com Link: https://patchwork.freedesktop.org/patch/msgid/20210519141407.88444-1-andrey.... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index cb58f692dad9..86a4209d8c77 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -222,11 +222,16 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) { struct drm_sched_job *job; + struct dma_fence *f; int r;
while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { struct drm_sched_fence *s_fence = job->s_fence;
+ /* Wait for all dependencies to avoid data corruptions */ + while ((f = job->sched->ops->dependency(job, entity))) + dma_fence_wait(f, false); + drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH);
From: Feifei Xu Feifei.Xu@amd.com
[ Upstream commit 5051cb794ac5d92154e186d87cdc12cba613f4f6 ]
For default cases,we should return 0. Otherwise resume will abort because of the wrong return value.
Signed-off-by: Feifei Xu Feifei.Xu@amd.com Reviewed-by: Lijo Lazar lijo.lazar@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index dcbe3a72da09..16ad4683eb69 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1779,10 +1779,8 @@ static int aldebaran_set_mp1_state(struct smu_context *smu, case PP_MP1_STATE_UNLOAD: return smu_cmn_set_mp1_state(smu, mp1_state); default: - return -EINVAL; + return 0; } - - return 0; }
static const struct pptable_funcs aldebaran_ppt_funcs = {
From: Mateusz Kwiatkowski kfyatek+publicgit@gmail.com
[ Upstream commit fc7a8abcee2225d6279ff785d33e24d70c738c6e ]
On the BCM2711 (Raspberry Pi 4), the VEC is actually connected to output 2 of pixelvalve3.
NOTE: This contradicts the Broadcom docs, but has been empirically tested and confirmed by Raspberry Pi firmware devs.
Signed-off-by: Mateusz Kwiatkowski kfyatek+publicgit@gmail.com Signed-off-by: Maxime Ripard maxime@cerno.tech Reviewed-by: Dave Stevenson dave.stevenson@raspberrypi.com Link: https://patchwork.freedesktop.org/patch/msgid/20210520150344.273900-2-maxime... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vc4/vc4_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 76657dcdf9b0..665ddf8f347f 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -994,7 +994,7 @@ static const struct vc4_pv_data bcm2711_pv3_data = { .fifo_depth = 64, .pixels_per_clock = 1, .encoder_types = { - [0] = VC4_ENCODER_TYPE_VEC, + [PV_CONTROL_CLK_SELECT_VEC] = VC4_ENCODER_TYPE_VEC, }, };
From: Zou Wei zou_wei@huawei.com
[ Upstream commit 5e4322a8b266bc9f5ee7ea4895f661c01dbd7cb3 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zou Wei zou_wei@huawei.com Signed-off-by: Maxime Ripard maxime@cerno.tech Link: https://patchwork.freedesktop.org/patch/msgid/1621840854-105978-1-git-send-e... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 8106b5634fe1..d1c9819ea9f9 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -745,7 +745,7 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, unsigned long pixel_rate, hsm_rate; int ret;
- ret = pm_runtime_get_sync(&vc4_hdmi->pdev->dev); + ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); if (ret < 0) { DRM_ERROR("Failed to retain power domain: %d\n", ret); return;
From: Zou Wei zou_wei@huawei.com
[ Upstream commit 33f90f27e1c5ccd648d3e78a1c28be9ee8791cf1 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zou Wei zou_wei@huawei.com Reviewed-by: Robert Foss robert.foss@linaro.org Signed-off-by: Robert Foss robert.foss@linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/1621840862-106024-1-git-send-e... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/cdns-dsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c index 76373e31df92..b31281f76117 100644 --- a/drivers/gpu/drm/bridge/cdns-dsi.c +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -1028,7 +1028,7 @@ static ssize_t cdns_dsi_transfer(struct mipi_dsi_host *host, struct mipi_dsi_packet packet; int ret, i, tx_len, rx_len;
- ret = pm_runtime_get_sync(host->dev); + ret = pm_runtime_resume_and_get(host->dev); if (ret < 0) return ret;
From: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com
[ Upstream commit 6566cae7aef30da8833f1fa0eb854baf33b96676 ]
There are two issues with scaling calculations, odm recout calculation and matching viewport to actual recout.
This change fixes both issues. Odm recout calculation via special casing and viewport matching issue by reworking the viewport calcualtion to use scaling ratios and recout to derrive the required offset and size.
Signed-off-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Reviewed-by: Jun Lei Jun.Lei@amd.com Acked-by: Qingqing Zhuo qingqing.zhuo@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 568 +++++++----------- drivers/gpu/drm/amd/display/dc/dc_types.h | 5 - .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c | 12 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 14 +- .../amd/display/dc/dml/display_mode_structs.h | 2 + .../drm/amd/display/dc/dml/display_mode_vba.c | 13 + .../gpu/drm/amd/display/dc/inc/hw/transform.h | 4 - 7 files changed, 232 insertions(+), 386 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 8cb937c046aa..78278a10d899 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -695,124 +695,23 @@ static void calculate_split_count_and_index(struct pipe_ctx *pipe_ctx, int *spli } }
-static void calculate_viewport(struct pipe_ctx *pipe_ctx) +/* + * This is a preliminary vp size calculation to allow us to check taps support. + * The result is completely overridden afterwards. + */ +static void calculate_viewport_size(struct pipe_ctx *pipe_ctx) { - const struct dc_plane_state *plane_state = pipe_ctx->plane_state; - const struct dc_stream_state *stream = pipe_ctx->stream; struct scaler_data *data = &pipe_ctx->plane_res.scl_data; - struct rect surf_src = plane_state->src_rect; - struct rect clip, dest; - int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 - || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; - int split_count = 0; - int split_idx = 0; - bool orthogonal_rotation, flip_y_start, flip_x_start; - - calculate_split_count_and_index(pipe_ctx, &split_count, &split_idx);
- if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE || - stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { - split_count = 0; - split_idx = 0; - } - - /* The actual clip is an intersection between stream - * source and surface clip - */ - dest = plane_state->dst_rect; - clip.x = stream->src.x > plane_state->clip_rect.x ? - stream->src.x : plane_state->clip_rect.x; - - clip.width = stream->src.x + stream->src.width < - plane_state->clip_rect.x + plane_state->clip_rect.width ? - stream->src.x + stream->src.width - clip.x : - plane_state->clip_rect.x + plane_state->clip_rect.width - clip.x ; - - clip.y = stream->src.y > plane_state->clip_rect.y ? - stream->src.y : plane_state->clip_rect.y; - - clip.height = stream->src.y + stream->src.height < - plane_state->clip_rect.y + plane_state->clip_rect.height ? - stream->src.y + stream->src.height - clip.y : - plane_state->clip_rect.y + plane_state->clip_rect.height - clip.y ; - - /* - * Need to calculate how scan origin is shifted in vp space - * to correctly rotate clip and dst - */ - get_vp_scan_direction( - plane_state->rotation, - plane_state->horizontal_mirror, - &orthogonal_rotation, - &flip_y_start, - &flip_x_start); - - if (orthogonal_rotation) { - swap(clip.x, clip.y); - swap(clip.width, clip.height); - swap(dest.x, dest.y); - swap(dest.width, dest.height); - } - if (flip_x_start) { - clip.x = dest.x + dest.width - clip.x - clip.width; - dest.x = 0; - } - if (flip_y_start) { - clip.y = dest.y + dest.height - clip.y - clip.height; - dest.y = 0; - } - - /* offset = surf_src.ofs + (clip.ofs - surface->dst_rect.ofs) * scl_ratio - * num_pixels = clip.num_pix * scl_ratio - */ - data->viewport.x = surf_src.x + (clip.x - dest.x) * surf_src.width / dest.width; - data->viewport.width = clip.width * surf_src.width / dest.width; - - data->viewport.y = surf_src.y + (clip.y - dest.y) * surf_src.height / dest.height; - data->viewport.height = clip.height * surf_src.height / dest.height; - - /* Handle split */ - if (split_count) { - /* extra pixels in the division remainder need to go to pipes after - * the extra pixel index minus one(epimo) defined here as: - */ - int epimo = 0; - - if (orthogonal_rotation) { - if (flip_y_start) - split_idx = split_count - split_idx; - - epimo = split_count - data->viewport.height % (split_count + 1); - - data->viewport.y += (data->viewport.height / (split_count + 1)) * split_idx; - if (split_idx > epimo) - data->viewport.y += split_idx - epimo - 1; - data->viewport.height = data->viewport.height / (split_count + 1) + (split_idx > epimo ? 1 : 0); - } else { - if (flip_x_start) - split_idx = split_count - split_idx; - - epimo = split_count - data->viewport.width % (split_count + 1); - - data->viewport.x += (data->viewport.width / (split_count + 1)) * split_idx; - if (split_idx > epimo) - data->viewport.x += split_idx - epimo - 1; - data->viewport.width = data->viewport.width / (split_count + 1) + (split_idx > epimo ? 1 : 0); - } + data->viewport.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz, data->recout.width)); + data->viewport.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert, data->recout.height)); + data->viewport_c.width = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.horz_c, data->recout.width)); + data->viewport_c.height = dc_fixpt_ceil(dc_fixpt_mul_int(data->ratios.vert_c, data->recout.height)); + if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || + pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { + swap(data->viewport.width, data->viewport.height); + swap(data->viewport_c.width, data->viewport_c.height); } - - /* Round down, compensate in init */ - data->viewport_c.x = data->viewport.x / vpc_div; - data->viewport_c.y = data->viewport.y / vpc_div; - data->inits.h_c = (data->viewport.x % vpc_div) != 0 ? dc_fixpt_half : dc_fixpt_zero; - data->inits.v_c = (data->viewport.y % vpc_div) != 0 ? dc_fixpt_half : dc_fixpt_zero; - - /* Round up, assume original video size always even dimensions */ - data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div; - data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div; - - data->viewport_unadjusted = data->viewport; - data->viewport_c_unadjusted = data->viewport_c; }
static void calculate_recout(struct pipe_ctx *pipe_ctx) @@ -821,26 +720,21 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx) const struct dc_stream_state *stream = pipe_ctx->stream; struct scaler_data *data = &pipe_ctx->plane_res.scl_data; struct rect surf_clip = plane_state->clip_rect; - bool pri_split_tb = pipe_ctx->bottom_pipe && - pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state && - stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM; - bool sec_split_tb = pipe_ctx->top_pipe && - pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state && - stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM; - int split_count = 0; - int split_idx = 0; + bool split_tb = stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM; + int split_count, split_idx;
calculate_split_count_and_index(pipe_ctx, &split_count, &split_idx); + if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE) + split_idx = 0;
/* * Only the leftmost ODM pipe should be offset by a nonzero distance */ - if (!pipe_ctx->prev_odm_pipe) { + if (!pipe_ctx->prev_odm_pipe || split_idx == split_count) { data->recout.x = stream->dst.x; if (stream->src.x < surf_clip.x) data->recout.x += (surf_clip.x - stream->src.x) * stream->dst.width / stream->src.width; - } else data->recout.x = 0;
@@ -861,26 +755,31 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx) if (data->recout.height + data->recout.y > stream->dst.y + stream->dst.height) data->recout.height = stream->dst.y + stream->dst.height - data->recout.y;
- /* Handle h & v split, handle rotation using viewport */ - if (sec_split_tb) { - data->recout.y += data->recout.height / 2; - /* Floor primary pipe, ceil 2ndary pipe */ - data->recout.height = (data->recout.height + 1) / 2; - } else if (pri_split_tb) + /* Handle h & v split */ + if (split_tb) { + ASSERT(data->recout.height % 2 == 0); data->recout.height /= 2; - else if (split_count) { - /* extra pixels in the division remainder need to go to pipes after - * the extra pixel index minus one(epimo) defined here as: - */ - int epimo = split_count - data->recout.width % (split_count + 1); - - /*no recout offset due to odm */ + } else if (split_count) { if (!pipe_ctx->next_odm_pipe && !pipe_ctx->prev_odm_pipe) { + /* extra pixels in the division remainder need to go to pipes after + * the extra pixel index minus one(epimo) defined here as: + */ + int epimo = split_count - data->recout.width % (split_count + 1); + data->recout.x += (data->recout.width / (split_count + 1)) * split_idx; if (split_idx > epimo) data->recout.x += split_idx - epimo - 1; + ASSERT(stream->view_format != VIEW_3D_FORMAT_SIDE_BY_SIDE || data->recout.width % 2 == 0); + data->recout.width = data->recout.width / (split_count + 1) + (split_idx > epimo ? 1 : 0); + } else { + /* odm */ + if (split_idx == split_count) { + /* rightmost pipe is the remainder recout */ + data->recout.width -= data->h_active * split_count - data->recout.x; + data->recout.x = 0; + } else + data->recout.width = data->h_active - data->recout.x; } - data->recout.width = data->recout.width / (split_count + 1) + (split_idx > epimo ? 1 : 0); } }
@@ -934,9 +833,15 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.ratios.vert_c, 19); }
-static inline void adjust_vp_and_init_for_seamless_clip( + +/* + * We completely calculate vp offset, size and inits here based entirely on scaling + * ratios and recout for pixel perfect pipe combine. + */ +static void calculate_init_and_vp( bool flip_scan_dir, - int recout_skip, + int recout_offset_within_recout_full, + int recout_size, int src_size, int taps, struct fixed31_32 ratio, @@ -944,91 +849,87 @@ static inline void adjust_vp_and_init_for_seamless_clip( int *vp_offset, int *vp_size) { - if (!flip_scan_dir) { - /* Adjust for viewport end clip-off */ - if ((*vp_offset + *vp_size) < src_size) { - int vp_clip = src_size - *vp_size - *vp_offset; - int int_part = dc_fixpt_floor(dc_fixpt_sub(*init, ratio)); - - int_part = int_part > 0 ? int_part : 0; - *vp_size += int_part < vp_clip ? int_part : vp_clip; - } - - /* Adjust for non-0 viewport offset */ - if (*vp_offset) { - int int_part; - - *init = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_skip)); - int_part = dc_fixpt_floor(*init) - *vp_offset; - if (int_part < taps) { - int int_adj = *vp_offset >= (taps - int_part) ? - (taps - int_part) : *vp_offset; - *vp_offset -= int_adj; - *vp_size += int_adj; - int_part += int_adj; - } else if (int_part > taps) { - *vp_offset += int_part - taps; - *vp_size -= int_part - taps; - int_part = taps; - } - init->value &= 0xffffffff; - *init = dc_fixpt_add_int(*init, int_part); - } - } else { - /* Adjust for non-0 viewport offset */ - if (*vp_offset) { - int int_part = dc_fixpt_floor(dc_fixpt_sub(*init, ratio)); - - int_part = int_part > 0 ? int_part : 0; - *vp_size += int_part < *vp_offset ? int_part : *vp_offset; - *vp_offset -= int_part < *vp_offset ? int_part : *vp_offset; - } + struct fixed31_32 temp; + int int_part;
- /* Adjust for viewport end clip-off */ - if ((*vp_offset + *vp_size) < src_size) { - int int_part; - int end_offset = src_size - *vp_offset - *vp_size; - - /* - * this is init if vp had no offset, keep in mind this is from the - * right side of vp due to scan direction - */ - *init = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_skip)); - /* - * this is the difference between first pixel of viewport available to read - * and init position, takning into account scan direction - */ - int_part = dc_fixpt_floor(*init) - end_offset; - if (int_part < taps) { - int int_adj = end_offset >= (taps - int_part) ? - (taps - int_part) : end_offset; - *vp_size += int_adj; - int_part += int_adj; - } else if (int_part > taps) { - *vp_size += int_part - taps; - int_part = taps; - } - init->value &= 0xffffffff; - *init = dc_fixpt_add_int(*init, int_part); - } + /* + * First of the taps starts sampling pixel number <init_int_part> corresponding to recout + * pixel 1. Next recout pixel samples int part of <init + scaling ratio> and so on. + * All following calculations are based on this logic. + * + * Init calculated according to formula: + * init = (scaling_ratio + number_of_taps + 1) / 2 + * init_bot = init + scaling_ratio + * to get pixel perfect combine add the fraction from calculating vp offset + */ + temp = dc_fixpt_mul_int(ratio, recout_offset_within_recout_full); + *vp_offset = dc_fixpt_floor(temp); + temp.value &= 0xffffffff; + *init = dc_fixpt_truncate(dc_fixpt_add(dc_fixpt_div_int( + dc_fixpt_add_int(ratio, taps + 1), 2), temp), 19); + /* + * If viewport has non 0 offset and there are more taps than covered by init then + * we should decrease the offset and increase init so we are never sampling + * outside of viewport. + */ + int_part = dc_fixpt_floor(*init); + if (int_part < taps) { + int_part = taps - int_part; + if (int_part > *vp_offset) + int_part = *vp_offset; + *vp_offset -= int_part; + *init = dc_fixpt_add_int(*init, int_part); } + /* + * If taps are sampling outside of viewport at end of recout and there are more pixels + * available in the surface we should increase the viewport size, regardless set vp to + * only what is used. + */ + temp = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_size - 1)); + *vp_size = dc_fixpt_floor(temp); + if (*vp_size + *vp_offset > src_size) + *vp_size = src_size - *vp_offset; + + /* We did all the math assuming we are scanning same direction as display does, + * however mirror/rotation changes how vp scans vs how it is offset. If scan direction + * is flipped we simply need to calculate offset from the other side of plane. + * Note that outside of viewport all scaling hardware works in recout space. + */ + if (flip_scan_dir) + *vp_offset = src_size - *vp_offset - *vp_size; }
-static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx) +static void calculate_inits_and_viewports(struct pipe_ctx *pipe_ctx) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; const struct dc_stream_state *stream = pipe_ctx->stream; - struct pipe_ctx *odm_pipe = pipe_ctx; struct scaler_data *data = &pipe_ctx->plane_res.scl_data; - struct rect src = pipe_ctx->plane_state->src_rect; - int recout_skip_h, recout_skip_v, surf_size_h, surf_size_v; + struct rect src = plane_state->src_rect; int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 - || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; + || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; + int split_count, split_idx, ro_lb, ro_tb, recout_full_x, recout_full_y; bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - int odm_idx = 0;
+ calculate_split_count_and_index(pipe_ctx, &split_count, &split_idx); /* - * Need to calculate the scan direction for viewport to make adjustments + * recout full is what the recout would have been if we didnt clip + * the source plane at all. We only care about left(ro_lb) and top(ro_tb) + * offsets of recout within recout full because those are the directions + * we scan from and therefore the only ones that affect inits. + */ + recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) + * stream->dst.width / stream->src.width; + recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) + * stream->dst.height / stream->src.height; + if (pipe_ctx->prev_odm_pipe && split_idx) + ro_lb = data->h_active * split_idx - recout_full_x; + else + ro_lb = data->recout.x - recout_full_x; + ro_tb = data->recout.y - recout_full_y; + ASSERT(ro_lb >= 0 && ro_tb >= 0); + + /* + * Work in recout rotation since that requires less transformations */ get_vp_scan_direction( plane_state->rotation, @@ -1037,145 +938,62 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx) &flip_vert_scan_dir, &flip_horz_scan_dir);
- /* Calculate src rect rotation adjusted to recout space */ - surf_size_h = src.x + src.width; - surf_size_v = src.y + src.height; - if (flip_horz_scan_dir) - src.x = 0; - if (flip_vert_scan_dir) - src.y = 0; if (orthogonal_rotation) { - swap(src.x, src.y); swap(src.width, src.height); + swap(flip_vert_scan_dir, flip_horz_scan_dir); }
- /*modified recout_skip_h calculation due to odm having no recout offset*/ - while (odm_pipe->prev_odm_pipe) { - odm_idx++; - odm_pipe = odm_pipe->prev_odm_pipe; - } - /*odm_pipe is the leftmost pipe in the ODM group*/ - recout_skip_h = odm_idx * data->recout.width; - - /* Recout matching initial vp offset = recout_offset - (stream dst offset + - * ((surf dst offset - stream src offset) * 1/ stream scaling ratio) - * - (surf surf_src offset * 1/ full scl ratio)) - */ - recout_skip_h += odm_pipe->plane_res.scl_data.recout.x - - (stream->dst.x + (plane_state->dst_rect.x - stream->src.x) - * stream->dst.width / stream->src.width - - src.x * plane_state->dst_rect.width / src.width - * stream->dst.width / stream->src.width); - - - recout_skip_v = data->recout.y - (stream->dst.y + (plane_state->dst_rect.y - stream->src.y) - * stream->dst.height / stream->src.height - - src.y * plane_state->dst_rect.height / src.height - * stream->dst.height / stream->src.height); - if (orthogonal_rotation) - swap(recout_skip_h, recout_skip_v); - /* - * Init calculated according to formula: - * init = (scaling_ratio + number_of_taps + 1) / 2 - * init_bot = init + scaling_ratio - * init_c = init + truncated_vp_c_offset(from calculate viewport) - */ - data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19); - - data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19); - - data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19); - - data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19); - - /* - * Taps, inits and scaling ratios are in recout space need to rotate - * to viewport rotation before adjustment - */ - adjust_vp_and_init_for_seamless_clip( + calculate_init_and_vp( flip_horz_scan_dir, - recout_skip_h, - surf_size_h, - orthogonal_rotation ? data->taps.v_taps : data->taps.h_taps, - orthogonal_rotation ? data->ratios.vert : data->ratios.horz, - orthogonal_rotation ? &data->inits.v : &data->inits.h, + ro_lb, + data->recout.width, + src.width, + data->taps.h_taps, + data->ratios.horz, + &data->inits.h, &data->viewport.x, &data->viewport.width); - adjust_vp_and_init_for_seamless_clip( + calculate_init_and_vp( flip_horz_scan_dir, - recout_skip_h, - surf_size_h / vpc_div, - orthogonal_rotation ? data->taps.v_taps_c : data->taps.h_taps_c, - orthogonal_rotation ? data->ratios.vert_c : data->ratios.horz_c, - orthogonal_rotation ? &data->inits.v_c : &data->inits.h_c, + ro_lb, + data->recout.width, + src.width / vpc_div, + data->taps.h_taps_c, + data->ratios.horz_c, + &data->inits.h_c, &data->viewport_c.x, &data->viewport_c.width); - adjust_vp_and_init_for_seamless_clip( + calculate_init_and_vp( flip_vert_scan_dir, - recout_skip_v, - surf_size_v, - orthogonal_rotation ? data->taps.h_taps : data->taps.v_taps, - orthogonal_rotation ? data->ratios.horz : data->ratios.vert, - orthogonal_rotation ? &data->inits.h : &data->inits.v, + ro_tb, + data->recout.height, + src.height, + data->taps.v_taps, + data->ratios.vert, + &data->inits.v, &data->viewport.y, &data->viewport.height); - adjust_vp_and_init_for_seamless_clip( + calculate_init_and_vp( flip_vert_scan_dir, - recout_skip_v, - surf_size_v / vpc_div, - orthogonal_rotation ? data->taps.h_taps_c : data->taps.v_taps_c, - orthogonal_rotation ? data->ratios.horz_c : data->ratios.vert_c, - orthogonal_rotation ? &data->inits.h_c : &data->inits.v_c, + ro_tb, + data->recout.height, + src.height / vpc_div, + data->taps.v_taps_c, + data->ratios.vert_c, + &data->inits.v_c, &data->viewport_c.y, &data->viewport_c.height); - - /* Interlaced inits based on final vert inits */ - data->inits.v_bot = dc_fixpt_add(data->inits.v, data->ratios.vert); - data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); - -} - -/* - * When handling 270 rotation in mixed SLS mode, we have - * stream->timing.h_border_left that is non zero. If we are doing - * pipe-splitting, this h_border_left value gets added to recout.x and when it - * calls calculate_inits_and_adj_vp() and - * adjust_vp_and_init_for_seamless_clip(), it can cause viewport.height for a - * pipe to be incorrect. - * - * To fix this, instead of using stream->timing.h_border_left, we can use - * stream->dst.x to represent the border instead. So we will set h_border_left - * to 0 and shift the appropriate amount in stream->dst.x. We will then - * perform all calculations in resource_build_scaling_params() based on this - * and then restore the h_border_left and stream->dst.x to their original - * values. - * - * shift_border_left_to_dst() will shift the amount of h_border_left to - * stream->dst.x and set h_border_left to 0. restore_border_left_from_dst() - * will restore h_border_left and stream->dst.x back to their original values - * We also need to make sure pipe_ctx->plane_res.scl_data.h_active uses the - * original h_border_left value in its calculation. - */ -static int shift_border_left_to_dst(struct pipe_ctx *pipe_ctx) -{ - int store_h_border_left = pipe_ctx->stream->timing.h_border_left; - - if (store_h_border_left) { - pipe_ctx->stream->timing.h_border_left = 0; - pipe_ctx->stream->dst.x += store_h_border_left; + if (orthogonal_rotation) { + swap(data->viewport.x, data->viewport.y); + swap(data->viewport.width, data->viewport.height); + swap(data->viewport_c.x, data->viewport_c.y); + swap(data->viewport_c.width, data->viewport_c.height); } - return store_h_border_left; -} - -static void restore_border_left_from_dst(struct pipe_ctx *pipe_ctx, - int store_h_border_left) -{ - pipe_ctx->stream->dst.x -= store_h_border_left; - pipe_ctx->stream->timing.h_border_left = store_h_border_left; + data->viewport.x += src.x; + data->viewport.y += src.y; + ASSERT(src.x % vpc_div == 0 && src.y % vpc_div == 0); + data->viewport_c.x += src.x / vpc_div; + data->viewport_c.y += src.y / vpc_div; }
bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) @@ -1183,48 +1001,42 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) const struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; bool res = false; - int store_h_border_left = shift_border_left_to_dst(pipe_ctx); DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); - /* Important: scaling ratio calculation requires pixel format, - * lb depth calculation requires recout and taps require scaling ratios. - * Inits require viewport, taps, ratios and recout of split pipe - */ + pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format);
- calculate_scaling_ratios(pipe_ctx); - - calculate_viewport(pipe_ctx); + /* Timing borders are part of vactive that we are also supposed to skip in addition + * to any stream dst offset. Since dm logic assumes dst is in addressable + * space we need to add the the left and top borders to dst offsets temporarily. + * TODO: fix in DM, stream dst is supposed to be in vactive + */ + pipe_ctx->stream->dst.x += timing->h_border_left; + pipe_ctx->stream->dst.y += timing->v_border_top;
- if (pipe_ctx->plane_res.scl_data.viewport.height < MIN_VIEWPORT_SIZE || - pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE) { - if (store_h_border_left) { - restore_border_left_from_dst(pipe_ctx, - store_h_border_left); - } - return false; - } + /* Calculate H and V active size */ + pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + + timing->h_border_left + timing->h_border_right; + pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + + timing->v_border_top + timing->v_border_bottom; + if (pipe_ctx->next_odm_pipe || pipe_ctx->prev_odm_pipe) + pipe_ctx->plane_res.scl_data.h_active /= get_num_odm_splits(pipe_ctx) + 1;
+ /* depends on h_active */ calculate_recout(pipe_ctx); + /* depends on pixel format */ + calculate_scaling_ratios(pipe_ctx); + /* depends on scaling ratios and recout, does not calculate offset yet */ + calculate_viewport_size(pipe_ctx);
- /** + /* + * LB calculations depend on vp size, h/v_active and scaling ratios * Setting line buffer pixel depth to 24bpp yields banding * on certain displays, such as the Sharp 4k */ pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; pipe_ctx->plane_res.scl_data.lb_params.alpha_en = plane_state->per_pixel_alpha;
- pipe_ctx->plane_res.scl_data.recout.x += timing->h_border_left; - pipe_ctx->plane_res.scl_data.recout.y += timing->v_border_top; - - pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + - store_h_border_left + timing->h_border_right; - pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + - timing->v_border_top + timing->v_border_bottom; - if (pipe_ctx->next_odm_pipe || pipe_ctx->prev_odm_pipe) - pipe_ctx->plane_res.scl_data.h_active /= get_num_odm_splits(pipe_ctx) + 1; - - /* Taps calculations */ if (pipe_ctx->plane_res.xfm != NULL) res = pipe_ctx->plane_res.xfm->funcs->transform_get_optimal_number_of_taps( pipe_ctx->plane_res.xfm, &pipe_ctx->plane_res.scl_data, &plane_state->scaling_quality); @@ -1251,9 +1063,31 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) &plane_state->scaling_quality); }
+ /* + * Depends on recout, scaling ratios, h_active and taps + * May need to re-check lb size after this in some obscure scenario + */ if (res) - /* May need to re-check lb size after this in some obscure scenario */ - calculate_inits_and_adj_vp(pipe_ctx); + calculate_inits_and_viewports(pipe_ctx); + + /* + * Handle side by side and top bottom 3d recout offsets after vp calculation + * since 3d is special and needs to calculate vp as if there is no recout offset + * This may break with rotation, good thing we aren't mixing hw rotation and 3d + */ + if (pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == plane_state) { + ASSERT(plane_state->rotation == ROTATION_ANGLE_0 || + (pipe_ctx->stream->view_format != VIEW_3D_FORMAT_TOP_AND_BOTTOM && + pipe_ctx->stream->view_format != VIEW_3D_FORMAT_SIDE_BY_SIDE)); + if (pipe_ctx->stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) + pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height; + else if (pipe_ctx->stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE) + pipe_ctx->plane_res.scl_data.recout.x += pipe_ctx->plane_res.scl_data.recout.width; + } + + if (pipe_ctx->plane_res.scl_data.viewport.height < MIN_VIEWPORT_SIZE || + pipe_ctx->plane_res.scl_data.viewport.width < MIN_VIEWPORT_SIZE) + res = false;
DC_LOG_SCALER("%s pipe %d:\nViewport: height:%d width:%d x:%d y:%d Recout: height:%d width:%d x:%d y:%d HACTIVE:%d VACTIVE:%d\n" "src_rect: height:%d width:%d x:%d y:%d dst_rect: height:%d width:%d x:%d y:%d clip_rect: height:%d width:%d x:%d y:%d\n", @@ -1282,8 +1116,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) plane_state->clip_rect.x, plane_state->clip_rect.y);
- if (store_h_border_left) - restore_border_left_from_dst(pipe_ctx, store_h_border_left); + pipe_ctx->stream->dst.x -= timing->h_border_left; + pipe_ctx->stream->dst.y -= timing->v_border_top;
return res; } diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 432754eaf10b..a6f21f9de6e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -271,11 +271,6 @@ struct dc_edid_caps { struct dc_panel_patch panel_patch; };
-struct view { - uint32_t width; - uint32_t height; -}; - struct dc_mode_flags { /* note: part of refresh rate flag*/ uint32_t INTERLACE :1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 98ab4b776924..a33f522a2648 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -631,8 +631,10 @@ static void dpp1_dscl_set_manual_ratio_init( SCL_V_INIT_INT, init_int);
if (REG(SCL_VERT_FILTER_INIT_BOT)) { - init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5; - init_int = dc_fixpt_floor(data->inits.v_bot); + struct fixed31_32 bot = dc_fixpt_add(data->inits.v, data->ratios.vert); + + init_frac = dc_fixpt_u0d19(bot) << 5; + init_int = dc_fixpt_floor(bot); REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0, SCL_V_INIT_FRAC_BOT, init_frac, SCL_V_INIT_INT_BOT, init_int); @@ -645,8 +647,10 @@ static void dpp1_dscl_set_manual_ratio_init( SCL_V_INIT_INT_C, init_int);
if (REG(SCL_VERT_FILTER_INIT_BOT_C)) { - init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5; - init_int = dc_fixpt_floor(data->inits.v_c_bot); + struct fixed31_32 bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); + + init_frac = dc_fixpt_u0d19(bot) << 5; + init_int = dc_fixpt_floor(bot); REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0, SCL_V_INIT_FRAC_BOT_C, init_frac, SCL_V_INIT_INT_BOT_C, init_int); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 8357aa3c41d5..d7d70b9bb387 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2289,12 +2289,14 @@ int dcn20_populate_dml_pipes_from_context(
pipes[pipe_cnt].pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; - pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport_unadjusted.y; - pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c_unadjusted.y; - pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport_unadjusted.width; - pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c_unadjusted.width; - pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport_unadjusted.height; - pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c_unadjusted.height; + pipes[pipe_cnt].pipe.src.viewport_y_y = scl->viewport.y; + pipes[pipe_cnt].pipe.src.viewport_y_c = scl->viewport_c.y; + pipes[pipe_cnt].pipe.src.viewport_width = scl->viewport.width; + pipes[pipe_cnt].pipe.src.viewport_width_c = scl->viewport_c.width; + pipes[pipe_cnt].pipe.src.viewport_height = scl->viewport.height; + pipes[pipe_cnt].pipe.src.viewport_height_c = scl->viewport_c.height; + pipes[pipe_cnt].pipe.src.viewport_width_max = pln->src_rect.width; + pipes[pipe_cnt].pipe.src.viewport_height_max = pln->src_rect.height; pipes[pipe_cnt].pipe.src.surface_width_y = pln->plane_size.surface_size.width; pipes[pipe_cnt].pipe.src.surface_height_y = pln->plane_size.surface_size.height; pipes[pipe_cnt].pipe.src.surface_width_c = pln->plane_size.chroma_size.width; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 2ece3690bfa3..a0f0c54c863b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -253,6 +253,8 @@ struct _vcs_dpi_display_pipe_source_params_st { unsigned int viewport_y_c; unsigned int viewport_width_c; unsigned int viewport_height_c; + unsigned int viewport_width_max; + unsigned int viewport_height_max; unsigned int data_pitch; unsigned int data_pitch_c; unsigned int meta_pitch; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 2a967458065b..8e5c9d22b364 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -630,6 +630,19 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) } } } + if (src->viewport_width_max) { + int hdiv_c = src->source_format >= dm_420_8 && src->source_format <= dm_422_10 ? 2 : 1; + int vdiv_c = src->source_format >= dm_420_8 && src->source_format <= dm_420_12 ? 2 : 1; + + if (mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] > src->viewport_width_max) + mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] = src->viewport_width_max; + if (mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] > src->viewport_height_max) + mode_lib->vba.ViewportHeight[mode_lib->vba.NumberOfActivePlanes] = src->viewport_height_max; + if (mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] > src->viewport_width_max / hdiv_c) + mode_lib->vba.ViewportWidthChroma[mode_lib->vba.NumberOfActivePlanes] = src->viewport_width_max / hdiv_c; + if (mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] > src->viewport_height_max / vdiv_c) + mode_lib->vba.ViewportHeightChroma[mode_lib->vba.NumberOfActivePlanes] = src->viewport_height_max / vdiv_c; + }
if (pipes[k].pipe.src.immediate_flip) { mode_lib->vba.ImmediateFlipSupport = true; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 2947d1b15512..2a0db2b03047 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -162,9 +162,7 @@ struct scl_inits { struct fixed31_32 h; struct fixed31_32 h_c; struct fixed31_32 v; - struct fixed31_32 v_bot; struct fixed31_32 v_c; - struct fixed31_32 v_c_bot; };
struct scaler_data { @@ -173,8 +171,6 @@ struct scaler_data { struct scaling_taps taps; struct rect viewport; struct rect viewport_c; - struct rect viewport_unadjusted; - struct rect viewport_c_unadjusted; struct rect recout; struct scaling_ratios ratios; struct scl_inits inits;
From: Alex Deucher alexander.deucher@amd.com
[ Upstream commit dd1d82c04e111b5a864638ede8965db2fe6d8653 ]
If smu_cmn_get_enabled_mask() fails, return false to be consistent with other asics.
Reviewed-by: Lijo Lazar lijo.lazar@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: Lee Jones lee.jones@linaro.org Reviewed-by: Lee Jones lee.jones@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 16ad4683eb69..0d2f61f56f45 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1290,10 +1290,13 @@ static int aldebaran_usr_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_
static bool aldebaran_is_dpm_running(struct smu_context *smu) { - int ret = 0; + int ret; uint32_t feature_mask[2]; unsigned long feature_enabled; + ret = smu_cmn_get_enabled_mask(smu, feature_mask, 2); + if (ret) + return false; feature_enabled = (unsigned long)((uint64_t)feature_mask[0] | ((uint64_t)feature_mask[1] << 32)); return !!(feature_enabled & SMC_DPM_FEATURE);
From: Alex Bee knaerzche@gmail.com
[ Upstream commit ab64b448a175b8a5a4bd323b8f74758c2574482c ]
Add dither_up, dsp_lut_en and data_blank registers to enable their respective functionality for RK3188's VOP. While at that also fix .dsp_blank register which is (only) set with BIT24 (same as RK3066)
Signed-off-by: Alex Bee knaerzche@gmail.com Signed-off-by: Heiko Stuebner heiko@sntech.de Link: https://patchwork.freedesktop.org/patch/msgid/20210528130554.72191-3-knaerzc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index 80053d91a301..b8dcee64a1f7 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -505,7 +505,10 @@ static const struct vop_common rk3188_common = { .dither_down_sel = VOP_REG(RK3188_DSP_CTRL0, 0x1, 27), .dither_down_en = VOP_REG(RK3188_DSP_CTRL0, 0x1, 11), .dither_down_mode = VOP_REG(RK3188_DSP_CTRL0, 0x1, 10), - .dsp_blank = VOP_REG(RK3188_DSP_CTRL1, 0x3, 24), + .dsp_blank = VOP_REG(RK3188_DSP_CTRL1, 0x1, 24), + .dither_up = VOP_REG(RK3188_DSP_CTRL0, 0x1, 9), + .dsp_lut_en = VOP_REG(RK3188_SYS_CTRL, 0x1, 28), + .data_blank = VOP_REG(RK3188_DSP_CTRL1, 0x1, 25), };
static const struct vop_win_data rk3188_vop_win_data[] = {
From: Alex Bee knaerzche@gmail.com
[ Upstream commit 742203cd56d150eb7884eb45abb7d9dbc2bdbf04 ]
Add dither_up, dsp_lut_en and data_blank registers to enable their respective functionality for RK3066's VOP.
While at that also fix .rb_swap and .format registers for all windows, which have to be set though RK3066_SYS_CTRL1 register. Also remove .scl from win1: Scaling is only supported on the primary plane.
Signed-off-by: Alex Bee knaerzche@gmail.com Signed-off-by: Heiko Stuebner heiko@sntech.de Link: https://patchwork.freedesktop.org/patch/msgid/20210528130554.72191-4-knaerzc... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index b8dcee64a1f7..a6fe03c3748a 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -349,8 +349,8 @@ static const struct vop_win_phy rk3066_win0_data = { .nformats = ARRAY_SIZE(formats_win_full), .format_modifiers = format_modifiers_win_full, .enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 0), - .format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 4), - .rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 19), + .format = VOP_REG(RK3066_SYS_CTRL1, 0x7, 4), + .rb_swap = VOP_REG(RK3066_SYS_CTRL1, 0x1, 19), .act_info = VOP_REG(RK3066_WIN0_ACT_INFO, 0x1fff1fff, 0), .dsp_info = VOP_REG(RK3066_WIN0_DSP_INFO, 0x0fff0fff, 0), .dsp_st = VOP_REG(RK3066_WIN0_DSP_ST, 0x1fff1fff, 0), @@ -361,13 +361,12 @@ static const struct vop_win_phy rk3066_win0_data = { };
static const struct vop_win_phy rk3066_win1_data = { - .scl = &rk3066_win_scl, .data_formats = formats_win_full, .nformats = ARRAY_SIZE(formats_win_full), .format_modifiers = format_modifiers_win_full, .enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 1), - .format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 7), - .rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 23), + .format = VOP_REG(RK3066_SYS_CTRL1, 0x7, 7), + .rb_swap = VOP_REG(RK3066_SYS_CTRL1, 0x1, 23), .act_info = VOP_REG(RK3066_WIN1_ACT_INFO, 0x1fff1fff, 0), .dsp_info = VOP_REG(RK3066_WIN1_DSP_INFO, 0x0fff0fff, 0), .dsp_st = VOP_REG(RK3066_WIN1_DSP_ST, 0x1fff1fff, 0), @@ -382,8 +381,8 @@ static const struct vop_win_phy rk3066_win2_data = { .nformats = ARRAY_SIZE(formats_win_lite), .format_modifiers = format_modifiers_win_lite, .enable = VOP_REG(RK3066_SYS_CTRL1, 0x1, 2), - .format = VOP_REG(RK3066_SYS_CTRL0, 0x7, 10), - .rb_swap = VOP_REG(RK3066_SYS_CTRL0, 0x1, 27), + .format = VOP_REG(RK3066_SYS_CTRL1, 0x7, 10), + .rb_swap = VOP_REG(RK3066_SYS_CTRL1, 0x1, 27), .dsp_info = VOP_REG(RK3066_WIN2_DSP_INFO, 0x0fff0fff, 0), .dsp_st = VOP_REG(RK3066_WIN2_DSP_ST, 0x1fff1fff, 0), .yrgb_mst = VOP_REG(RK3066_WIN2_MST, 0xffffffff, 0), @@ -408,6 +407,9 @@ static const struct vop_common rk3066_common = { .dither_down_en = VOP_REG(RK3066_DSP_CTRL0, 0x1, 11), .dither_down_mode = VOP_REG(RK3066_DSP_CTRL0, 0x1, 10), .dsp_blank = VOP_REG(RK3066_DSP_CTRL1, 0x1, 24), + .dither_up = VOP_REG(RK3066_DSP_CTRL0, 0x1, 9), + .dsp_lut_en = VOP_REG(RK3066_SYS_CTRL1, 0x1, 31), + .data_blank = VOP_REG(RK3066_DSP_CTRL1, 0x1, 25), };
static const struct vop_win_data rk3066_vop_win_data[] = {
From: Thierry Reding treding@nvidia.com
[ Upstream commit e16efff4e5f490ce34a8c60d9ae7297dca5eb616 ]
The driver currently exposes several YUV formats but fails to properly program all the registers needed to display such formats. Add the right programming sequences so that overlay windows can be used to accelerate color format conversions in multimedia playback use-cases.
Signed-off-by: Thierry Reding treding@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/tegra/dc.c | 2 +- drivers/gpu/drm/tegra/dc.h | 7 +++++ drivers/gpu/drm/tegra/hub.c | 52 +++++++++++++++++++++++++++++++---- drivers/gpu/drm/tegra/plane.c | 23 ++++++++++++++-- drivers/gpu/drm/tegra/plane.h | 3 +- 5 files changed, 78 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index f9120dc24682..c5ea88a686d1 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -348,7 +348,7 @@ static void tegra_dc_setup_window(struct tegra_plane *plane, * For YUV planar modes, the number of bytes per pixel takes into * account only the luma component and therefore is 1. */ - yuv = tegra_plane_format_is_yuv(window->format, &planar); + yuv = tegra_plane_format_is_yuv(window->format, &planar, NULL); if (!yuv) bpp = window->bits_per_pixel / 8; else diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h index 29f19c3c6149..455c3fdef8dc 100644 --- a/drivers/gpu/drm/tegra/dc.h +++ b/drivers/gpu/drm/tegra/dc.h @@ -696,6 +696,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
#define DC_WINBUF_START_ADDR_HI 0x80d
+#define DC_WINBUF_START_ADDR_HI_U 0x80f +#define DC_WINBUF_START_ADDR_HI_V 0x811 + #define DC_WINBUF_CDE_CONTROL 0x82f #define ENABLE_SURFACE (1 << 0)
@@ -720,6 +723,10 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc); #define DC_WIN_PLANAR_STORAGE 0x709 #define PITCH(x) (((x) >> 6) & 0x1fff)
+#define DC_WIN_PLANAR_STORAGE_UV 0x70a +#define PITCH_U(x) ((((x) >> 6) & 0x1fff) << 0) +#define PITCH_V(x) ((((x) >> 6) & 0x1fff) << 16) + #define DC_WIN_SET_PARAMS 0x70d #define CLAMP_BEFORE_BLEND (1 << 15) #define DEGAMMA_NONE (0 << 13) diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index bfae8a02f55b..94e1ccfb6235 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -454,7 +454,9 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, unsigned int zpos = new_state->normalized_zpos; struct drm_framebuffer *fb = new_state->fb; struct tegra_plane *p = to_tegra_plane(plane); - dma_addr_t base; + dma_addr_t base, addr_flag = 0; + unsigned int bpc; + bool yuv, planar; u32 value; int err;
@@ -473,6 +475,8 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, return; }
+ yuv = tegra_plane_format_is_yuv(tegra_plane_state->format, &planar, &bpc); + tegra_dc_assign_shared_plane(dc, p);
tegra_plane_writel(p, VCOUNTER, DC_WIN_CORE_ACT_CONTROL); @@ -501,8 +505,6 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, /* disable compression */ tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL);
- base = tegra_plane_state->iova[0] + fb->offsets[0]; - #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT /* * Physical address bit 39 in Tegra194 is used as a switch for special @@ -510,9 +512,12 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, * dGPU sector layout. */ if (tegra_plane_state->tiling.sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU) - base |= BIT_ULL(39); + addr_flag = BIT_ULL(39); #endif
+ base = tegra_plane_state->iova[0] + fb->offsets[0]; + base |= addr_flag; + tegra_plane_writel(p, tegra_plane_state->format, DC_WIN_COLOR_DEPTH); tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS);
@@ -535,7 +540,44 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane, value = PITCH(fb->pitches[0]); tegra_plane_writel(p, value, DC_WIN_PLANAR_STORAGE);
- value = CLAMP_BEFORE_BLEND | DEGAMMA_SRGB | INPUT_RANGE_FULL; + if (yuv && planar) { + base = tegra_plane_state->iova[1] + fb->offsets[1]; + base |= addr_flag; + + tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI_U); + tegra_plane_writel(p, lower_32_bits(base), DC_WINBUF_START_ADDR_U); + + base = tegra_plane_state->iova[2] + fb->offsets[2]; + base |= addr_flag; + + tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI_V); + tegra_plane_writel(p, lower_32_bits(base), DC_WINBUF_START_ADDR_V); + + value = PITCH_U(fb->pitches[2]) | PITCH_V(fb->pitches[2]); + tegra_plane_writel(p, value, DC_WIN_PLANAR_STORAGE_UV); + } else { + tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_U); + tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_HI_U); + tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_V); + tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_HI_V); + tegra_plane_writel(p, 0, DC_WIN_PLANAR_STORAGE_UV); + } + + value = CLAMP_BEFORE_BLEND | INPUT_RANGE_FULL; + + if (yuv) { + if (bpc < 12) + value |= DEGAMMA_YUV8_10; + else + value |= DEGAMMA_YUV12; + + /* XXX parameterize */ + value |= COLOR_SPACE_YUV_2020; + } else { + if (!tegra_plane_format_is_indexed(tegra_plane_state->format)) + value |= DEGAMMA_SRGB; + } + tegra_plane_writel(p, value, DC_WIN_SET_PARAMS);
value = OFFSET_X(new_state->src_y >> 16) | diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index 2e11b4b1f702..2e65b4075ce6 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -375,7 +375,20 @@ int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap) return 0; }
-bool tegra_plane_format_is_yuv(unsigned int format, bool *planar) +bool tegra_plane_format_is_indexed(unsigned int format) +{ + switch (format) { + case WIN_COLOR_DEPTH_P1: + case WIN_COLOR_DEPTH_P2: + case WIN_COLOR_DEPTH_P4: + case WIN_COLOR_DEPTH_P8: + return true; + } + + return false; +} + +bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc) { switch (format) { case WIN_COLOR_DEPTH_YCbCr422: @@ -383,6 +396,9 @@ bool tegra_plane_format_is_yuv(unsigned int format, bool *planar) if (planar) *planar = false;
+ if (bpc) + *bpc = 8; + return true;
case WIN_COLOR_DEPTH_YCbCr420P: @@ -396,6 +412,9 @@ bool tegra_plane_format_is_yuv(unsigned int format, bool *planar) if (planar) *planar = true;
+ if (bpc) + *bpc = 8; + return true; }
@@ -421,7 +440,7 @@ static bool __drm_format_has_alpha(u32 format) static int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha) { - if (tegra_plane_format_is_yuv(opaque, NULL)) { + if (tegra_plane_format_is_yuv(opaque, NULL, NULL)) { *alpha = opaque; return 0; } diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h index c691dd79b27b..1785c1559c0c 100644 --- a/drivers/gpu/drm/tegra/plane.h +++ b/drivers/gpu/drm/tegra/plane.h @@ -74,7 +74,8 @@ int tegra_plane_state_add(struct tegra_plane *plane, struct drm_plane_state *state);
int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap); -bool tegra_plane_format_is_yuv(unsigned int format, bool *planar); +bool tegra_plane_format_is_indexed(unsigned int format); +bool tegra_plane_format_is_yuv(unsigned int format, bool *planar, unsigned int *bpc); int tegra_plane_setup_legacy_state(struct tegra_plane *tegra, struct tegra_plane_state *state);
From: Yu Kuai yukuai3@huawei.com
[ Upstream commit f674555ee5444c8987dfea0922f1cf6bf0c12847 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Robert Foss robert.foss@linaro.org Signed-off-by: Robert Foss robert.foss@linaro.org Link: https://patchwork.freedesktop.org/patch/msgid/20210531135622.3348252-1-yukua... Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index 989a05bc8197..ddcd5b6ad37a 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -2369,9 +2369,9 @@ static int cdns_mhdp_probe(struct platform_device *pdev) clk_prepare_enable(clk);
pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { - dev_err(dev, "pm_runtime_get_sync failed\n"); + dev_err(dev, "pm_runtime_resume_and_get failed\n"); pm_runtime_disable(dev); goto clk_disable; }
From: Shiwu Zhang shiwu.zhang@amd.com
[ Upstream commit eba98523724be7ad3539f2c975de1527e0c99dd6 ]
Although the kfd_ioctl_get_dmabuf_info() still fail it will indicate the caller right metadat_size useful for the same kfd ioctl next time.
Signed-off-by: Shiwu Zhang shiwu.zhang@amd.com Reviewed-by: Nirmoy Das nirmoy.das@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index f9434bc2f9b2..db00de33caa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1246,6 +1246,9 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
BUG_ON(bo->tbo.type == ttm_bo_type_kernel); ubo = to_amdgpu_bo_user(bo); + if (metadata_size) + *metadata_size = ubo->metadata_size; + if (buffer) { if (buffer_size < ubo->metadata_size) return -EINVAL; @@ -1254,8 +1257,6 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, memcpy(buffer, ubo->metadata, ubo->metadata_size); }
- if (metadata_size) - *metadata_size = ubo->metadata_size; if (flags) *flags = ubo->metadata_flags;
From: Kevin Wang kevin1.wang@amd.com
[ Upstream commit 2b8f731849800e3948763ccaff31cceac526789b ]
Re-adjust the function return order to avoid empty sdma version in the sriov environment. (read amdgpu_firmware_info)
Signed-off-by: Kevin Wang kevin1.wang@amd.com Reviewed-by: Stanley.Yang Stanley.Yang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 240596b25fe4..9ab23947a151 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -145,9 +145,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL;
- if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID)) - return 0; - DRM_DEBUG("\n");
switch (adev->asic_type) { @@ -182,6 +179,9 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) (void *)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance));
+ if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_SIENNA_CICHLID)) + return 0; + DRM_DEBUG("psp_load == '%s'\n", adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false");
From: Kees Cook keescook@chromium.org
[ Upstream commit 06888d571b513cbfc0b41949948def6cb81021b2 ]
Instead of reading the desired 5 bytes of the actual target field, the code was reading 8. This could result in a corrupted value if the trailing 3 bytes were non-zero, so instead use an appropriately sized and zero-initialized bounce buffer, and read only 5 bytes before casting to u64.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 2cbd931363bd..6d26d9c63ab2 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -29,8 +29,10 @@ static inline enum mod_hdcp_status validate_bksv(struct mod_hdcp *hdcp) { uint64_t n = 0; uint8_t count = 0; + u8 bksv[sizeof(n)] = { };
- memcpy(&n, hdcp->auth.msg.hdcp1.bksv, sizeof(uint64_t)); + memcpy(bksv, hdcp->auth.msg.hdcp1.bksv, sizeof(hdcp->auth.msg.hdcp1.bksv)); + n = *(uint64_t *)bksv;
while (n) { count++;
From: Jiansong Chen Jiansong.Chen@amd.com
[ Upstream commit 7d9c70d23550eb86a1bec1954ccaa8d6ec3a3328 ]
Take the situation with gfxoff, the optimization may cause corrupt CE ram contents. In addition emit_cntxcntl callback has similar optimization which firmware can handle properly even for power feature.
Signed-off-by: Jiansong Chen Jiansong.Chen@amd.com Reviewed-by: Hawking Zhang Hawking.Zhang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a2fe2dac32c1..98906a43fda3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -130,7 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; - bool skip_preamble, need_ctx_switch; + bool need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; @@ -227,7 +227,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (need_ctx_switch) status |= AMDGPU_HAVE_CTX_SWITCH;
- skip_preamble = ring->current_ctx == fence_ctx; if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; status |= job->preemption_status; @@ -245,14 +244,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, for (i = 0; i < num_ibs; ++i) { ib = &ibs[i];
- /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_mcbp && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ - continue; - if (job && ring->funcs->emit_frame_cntl) { if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { amdgpu_ring_emit_frame_cntl(ring, false, secure);
From: Nikola Cornij nikola.cornij@amd.com
[ Upstream commit 346cf627fb27c0fea63a041cedbaa4f31784e504 ]
[why] DSCCLK validation is not necessary because DSCCLK is derrived from DISPCLK, therefore if DISPCLK validation passes, DSCCLK is valid, too. Doing DSCLK validation in addition to DISPCLK leads to modes being wrongly rejected when DSCCLK was incorrectly set outside of DML.
[how] Remove DSCCLK validation because it's implicitly validated under DISPCLK
Signed-off-by: Nikola Cornij nikola.cornij@amd.com Reviewed-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../dc/dml/dcn30/display_mode_vba_30.c | 64 ++++++------------- 1 file changed, 21 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index cb3f70a71b51..af7d57602b2c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -64,6 +64,7 @@ typedef struct { #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff #define DCN30_MAX_DSC_IMAGE_WIDTH 5184 +#define DCN30_MAX_FMT_420_BUFFER_WIDTH 4096
static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( @@ -3987,19 +3988,30 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; - } else if (v->DSCEnabled[k] && (v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH)) { - v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; - v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; - /*420 format workaround*/ - if (v->HActive[k] > 4096 && v->OutputFormat[k] == dm_420) { + } + if (v->DSCEnabled[k] && v->HActive[k] > DCN30_MAX_DSC_IMAGE_WIDTH + && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { + if (v->HActive[k] / 2 > DCN30_MAX_DSC_IMAGE_WIDTH) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; + } else { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; + } + } + if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN30_MAX_FMT_420_BUFFER_WIDTH + && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { + if (v->HActive[k] / 2 > DCN30_MAX_FMT_420_BUFFER_WIDTH) { + v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; + v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; + } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } } - if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { v->MPCCombine[i][j][k] = false; v->NoOfDPP[i][j][k] = 4; @@ -4281,42 +4293,8 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } }
- for (i = 0; i < v->soc.num_states; i++) { - v->DSCCLKRequiredMoreThanSupported[i] = false; - for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { - if (v->BlendingAndTiming[k] == k) { - if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { - if (v->OutputFormat[k] == dm_420) { - v->DSCFormatFactor = 2; - } else if (v->OutputFormat[k] == dm_444) { - v->DSCFormatFactor = 1; - } else if (v->OutputFormat[k] == dm_n422) { - v->DSCFormatFactor = 2; - } else { - v->DSCFormatFactor = 1; - } - if (v->RequiresDSC[i][k] == true) { - if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { - if (v->PixelClockBackEnd[k] / 12.0 / v->DSCFormatFactor - > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) { - v->DSCCLKRequiredMoreThanSupported[i] = true; - } - } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { - if (v->PixelClockBackEnd[k] / 6.0 / v->DSCFormatFactor - > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) { - v->DSCCLKRequiredMoreThanSupported[i] = true; - } - } else { - if (v->PixelClockBackEnd[k] / 3.0 / v->DSCFormatFactor - > (1.0 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * v->MaxDSCCLK[i]) { - v->DSCCLKRequiredMoreThanSupported[i] = true; - } - } - } - } - } - } - } + /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ + for (i = 0; i < v->soc.num_states; i++) { v->NotEnoughDSCUnits[i] = false; v->TotalDSCUnitsRequired = 0.0; @@ -5319,7 +5297,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (j = 0; j < 2; j++) { if (v->ScaleRatioAndTapsSupport == 1 && v->SourceFormatPixelAndScanSupport == 1 && v->ViewportSizeSupport[i][j] == 1 && v->DIOSupport[i] == 1 && v->ODMCombine4To1SupportCheckOK[i] == 1 - && v->NotEnoughDSCUnits[i] == 0 && v->DSCCLKRequiredMoreThanSupported[i] == 0 + && v->NotEnoughDSCUnits[i] == 0 && v->DTBCLKRequiredMoreThanSupported[i] == 0 && v->ROBSupport[i][j] == 1 && v->DISPCLK_DPPCLK_Support[i][j] == 1 && v->TotalAvailablePipesSupport[i][j] == 1 && EnoughWritebackUnits == 1 && WritebackModeSupport == 1
From: Ilya Bakoulin Ilya.Bakoulin@amd.com
[ Upstream commit ae88357c7966ec2a52cb1e70fd42f74a40c9dfcb ]
[Why] This change was found to break some high-refresh modes. Reverting to unblock mainline.
Signed-off-by: Ilya Bakoulin Ilya.Bakoulin@amd.com Reviewed-by: Sung Lee Sung.Lee@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 78 +++++++------------ .../drm/amd/display/dc/dcn21/dcn21_resource.c | 33 +++----- 2 files changed, 39 insertions(+), 72 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 1f56ceab5922..75ba86f951f8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -835,66 +835,47 @@ static struct wm_table lpddr4_wm_table_rn = { } };
-static unsigned int find_max_fclk_for_voltage(struct dpm_clocks *clock_table, - unsigned int voltage) +static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; - uint32_t max_clk = 0;
- for (i = 0; i < PP_SMU_NUM_FCLK_DPM_LEVELS; i++) { - if (clock_table->FClocks[i].Vol <= voltage) { - max_clk = clock_table->FClocks[i].Freq > max_clk ? - clock_table->FClocks[i].Freq : max_clk; - } - } - - return max_clk; -} - -static unsigned int find_max_memclk_for_voltage(struct dpm_clocks *clock_table, - unsigned int voltage) -{ - int i; - uint32_t max_clk = 0; - - for (i = 0; i < PP_SMU_NUM_MEMCLK_DPM_LEVELS; i++) { - if (clock_table->MemClocks[i].Vol <= voltage) { - max_clk = clock_table->MemClocks[i].Freq > max_clk ? - clock_table->MemClocks[i].Freq : max_clk; - } + for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) { + if (clock_table->SocClocks[i].Vol == voltage) + return clock_table->SocClocks[i].Freq; }
- return max_clk; + ASSERT(0); + return 0; }
-static unsigned int find_max_socclk_for_voltage(struct dpm_clocks *clock_table, - unsigned int voltage) +static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; - uint32_t max_clk = 0;
- for (i = 0; i < PP_SMU_NUM_SOCCLK_DPM_LEVELS; i++) { - if (clock_table->SocClocks[i].Vol <= voltage) { - max_clk = clock_table->SocClocks[i].Freq > max_clk ? - clock_table->SocClocks[i].Freq : max_clk; - } + for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; i++) { + if (clock_table->DcfClocks[i].Vol == voltage) + return clock_table->DcfClocks[i].Freq; }
- return max_clk; + ASSERT(0); + return 0; }
static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info) { int i, j = 0; - unsigned int volt;
j = -1;
- /* Find max DPM */ - for (i = 0; i < PP_SMU_NUM_DCFCLK_DPM_LEVELS; ++i) { - if (clock_table->DcfClocks[i].Freq != 0 && - clock_table->DcfClocks[i].Vol != 0) + ASSERT(PP_SMU_NUM_FCLK_DPM_LEVELS <= MAX_NUM_DPM_LVL); + + /* Find lowest DPM, FCLK is filled in reverse order*/ + + for (i = PP_SMU_NUM_FCLK_DPM_LEVELS - 1; i >= 0; i--) { + if (clock_table->FClocks[i].Freq != 0 && clock_table->FClocks[i].Vol != 0) { j = i; + break; + } }
if (j == -1) { @@ -905,18 +886,13 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
bw_params->clk_table.num_entries = j + 1;
- for (i = 0; i < bw_params->clk_table.num_entries; i++) { - volt = clock_table->DcfClocks[i].Vol; - - bw_params->clk_table.entries[i].voltage = volt; - bw_params->clk_table.entries[i].dcfclk_mhz = - clock_table->DcfClocks[i].Freq; - bw_params->clk_table.entries[i].fclk_mhz = - find_max_fclk_for_voltage(clock_table, volt); - bw_params->clk_table.entries[i].memclk_mhz = - find_max_memclk_for_voltage(clock_table, volt); - bw_params->clk_table.entries[i].socclk_mhz = - find_max_socclk_for_voltage(clock_table, volt); + for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { + bw_params->clk_table.entries[i].fclk_mhz = clock_table->FClocks[j].Freq; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->MemClocks[j].Freq; + bw_params->clk_table.entries[i].voltage = clock_table->FClocks[j].Vol; + bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol); + bw_params->clk_table.entries[i].socclk_mhz = find_socclk_for_voltage(clock_table, + bw_params->clk_table.entries[i].voltage); }
bw_params->vram_type = bios_info->memory_type; diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 38a2aa87f5f5..8e3f1d0b4cc3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1575,12 +1575,10 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li low_pstate_lvl.phyclk_d18_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_d18_mhz; low_pstate_lvl.phyclk_mhz = dcn2_1_soc.clock_limits[high_voltage_lvl].phyclk_mhz;
- if (clk_table->num_entries < MAX_NUM_DPM_LVL) { - for (i = clk_table->num_entries; i > 1; i--) - clk_table->entries[i] = clk_table->entries[i-1]; - clk_table->entries[1] = clk_table->entries[0]; - clk_table->num_entries++; - } + for (i = clk_table->num_entries; i > 1; i--) + clk_table->entries[i] = clk_table->entries[i-1]; + clk_table->entries[1] = clk_table->entries[0]; + clk_table->num_entries++;
return low_pstate_lvl; } @@ -1612,6 +1610,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } }
+ /* clk_table[1] is reserved for min DF PState. skip here to fill in later. */ + if (i == 1) + k++; + clock_limits[k].state = k; clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; @@ -1628,25 +1630,14 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
k++; } - - if (clk_table->num_entries >= MAX_NUM_DPM_LVL) { - for (i = 0; i < clk_table->num_entries + 1; i++) - dcn2_1_soc.clock_limits[i] = clock_limits[i]; - } else { - dcn2_1_soc.clock_limits[0] = clock_limits[0]; - for (i = 2; i < clk_table->num_entries + 1; i++) { - dcn2_1_soc.clock_limits[i] = clock_limits[i - 1]; - dcn2_1_soc.clock_limits[i].state = i; - } - } - + for (i = 0; i < clk_table->num_entries + 1; i++) + dcn2_1_soc.clock_limits[i] = clock_limits[i]; if (clk_table->num_entries) { + dcn2_1_soc.num_states = clk_table->num_entries + 1; /* fill in min DF PState */ dcn2_1_soc.clock_limits[1] = construct_low_pstate_lvl(clk_table, closest_clk_lvl); - dcn2_1_soc.num_states = clk_table->num_entries; /* duplicate last level */ - dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = - dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states; }
From: Roman Li roman.li@amd.com
[ Upstream commit c521fc316d12fb9ea7b7680e301d673bceda922e ]
[Why] We update scaling settings when scaling mode has been changed. However when changing mode from native resolution the scaling mode previously set gets ignored.
[How] Perform scaling settings update on modeset.
Signed-off-by: Roman Li roman.li@amd.com Reviewed-by: Nicholas Kazlauskas Nicholas.Kazlauskas@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 62663e287b21..58577f7a57c1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9472,7 +9472,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, BUG_ON(dm_new_crtc_state->stream == NULL);
/* Scaling or underscan settings */ - if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state)) + if (is_scaling_state_different(dm_old_conn_state, dm_new_conn_state) || + drm_atomic_crtc_needs_modeset(new_crtc_state)) update_stream_scaling_settings( &new_crtc_state->mode, dm_new_conn_state, dm_new_crtc_state->stream);
From: Vladimir Stempen vladimir.stempen@amd.com
[ Upstream commit 3f8518b60c10aa96f3efa38a967a0b4eb9211ac0 ]
[why] When OS overrides training link training parameters for MST device to SST mode, MST resources are not released and leak of the resource may result crash and incorrect MST discovery during following hot plugs.
[how] Retaining sink object to be reused by SST link and releasing MST resources.
Signed-off-by: Vladimir Stempen vladimir.stempen@amd.com Reviewed-by: Wenjing Liu Wenjing.Liu@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 3ff3d9e90983..f26ed50a9660 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1784,6 +1784,8 @@ static void set_dp_mst_mode(struct dc_link *link, bool mst_enable) link->type = dc_connection_single; link->local_sink = link->remote_sinks[0]; link->local_sink->sink_signal = SIGNAL_TYPE_DISPLAY_PORT; + dc_sink_retain(link->local_sink); + dm_helpers_dp_mst_stop_top_mgr(link->ctx, link); } else if (mst_enable == true && link->type == dc_connection_single && link->remote_sinks[0] != NULL) {
From: Wesley Chalmers Wesley.Chalmers@amd.com
[ Upstream commit 3577e1678772ce3ede92af3a75b44a4b76f9b4ad ]
[WHY] DISPCLK_MAX_ERRDET_CYCLES must be 7 to prevent connection loss when changing DENTIST_DISPCLK_WDIVIDER from 126 to 127 and back.
Signed-off-by: Wesley Chalmers Wesley.Chalmers@amd.com Reviewed-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 6a10daec15cc..793554e61c52 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -243,7 +243,7 @@ void dcn20_dccg_init(struct dce_hwseq *hws) REG_WRITE(MILLISECOND_TIME_BASE_DIV, 0x1186a0);
/* This value is dependent on the hardware pipeline delay so set once per SOC */ - REG_WRITE(DISPCLK_FREQ_CHANGE_CNTL, 0x801003c); + REG_WRITE(DISPCLK_FREQ_CHANGE_CNTL, 0xe01003c); }
void dcn20_disable_vga(
From: Wesley Chalmers Wesley.Chalmers@amd.com
[ Upstream commit e4e3678260e9734f6f41b4325aac0b171833a618 ]
[WHY] For DCN30 and later, there is no data in DML arrays indexed by state at index num_states.
Signed-off-by: Wesley Chalmers Wesley.Chalmers@amd.com Reviewed-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../amd/display/dc/dml/dcn30/display_mode_vba_30.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index af7d57602b2c..db6bb7ea5316 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -2053,7 +2053,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( - v->soc.clock_limits[mode_lib->soc.num_states].dispclk_mhz, + v->soc.clock_limits[mode_lib->soc.num_states - 1].dispclk_mhz, v->DISPCLKDPPCLKVCOSpeed); if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { @@ -3958,20 +3958,20 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1.0 + v->DISPCLKRampingMargin / 100.0); - if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states] - && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) { + if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + v->DISPCLKRampingMargin / 100.0); - if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states] - && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) { + if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + v->DISPCLKRampingMargin / 100.0); - if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states] - && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states])) { + if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[mode_lib->soc.num_states - 1] + && v->MaxDppclk[i] == v->MaxDppclk[mode_lib->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); }
From: Aric Cyr aric.cyr@amd.com
[ Upstream commit 665f28507a2a3d8d72ed9afa9a2b9b17fd43add1 ]
[Why] When calculating recout width for an MPO plane on a mode that's using ODM combine, driver can calculate a negative value, resulting in a crash.
[How] For negative widths, use zero such that validation will prune the configuration correctly and disallow MPO.
Signed-off-by: Aric Cyr aric.cyr@amd.com Reviewed-by: Krunoslav Kovac Krunoslav.Kovac@amd.com Acked-by: Stylon Wang stylon.wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 78278a10d899..3b1068a09095 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -776,6 +776,11 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx) if (split_idx == split_count) { /* rightmost pipe is the remainder recout */ data->recout.width -= data->h_active * split_count - data->recout.x; + + /* ODM combine cases with MPO we can get negative widths */ + if (data->recout.width < 0) + data->recout.width = 0; + data->recout.x = 0; } else data->recout.width = data->h_active - data->recout.x;
From: Nirmoy Das nirmoy.das@amd.com
[ Upstream commit bc05716d4fdd065013633602c5960a2bf1511b9c ]
Fixes handling when page tables are in system memory.
v3: remove struct amdgpu_vm_parser. v2: remove unwanted variable. change amdgpu_amdkfd_validate instead of amdgpu_amdkfd_bo_validate.
Signed-off-by: Nirmoy Das nirmoy.das@amd.com Reviewed-by: Christian König christian.koenig@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 21 ++++--------------- 1 file changed, 4 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d4118c8128a..5e69b5b50a19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -50,12 +50,6 @@ static struct { spinlock_t mem_limit_lock; } kfd_mem_limit;
-/* Struct used for amdgpu_amdkfd_bo_validate */ -struct amdgpu_vm_parser { - uint32_t domain; - bool wait; -}; - static const char * const domain_bit_to_string[] = { "CPU", "GTT", @@ -346,11 +340,9 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, return ret; }
-static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { - struct amdgpu_vm_parser *p = param; - - return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); + return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); }
/* vm_validate_pt_pd_bos - Validate page table and directory BOs @@ -364,20 +356,15 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) { struct amdgpu_bo *pd = vm->root.base.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); - struct amdgpu_vm_parser param; int ret;
- param.domain = AMDGPU_GEM_DOMAIN_VRAM; - param.wait = false; - - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, - ¶m); + ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); if (ret) { pr_err("failed to validate PT BOs\n"); return ret; }
- ret = amdgpu_amdkfd_validate(¶m, pd); + ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); if (ret) { pr_err("failed to validate PD\n"); return ret;
From: Mark Yacoub markyacoub@chromium.org
[ Upstream commit 03fc4cf45d30533d54f0f4ebc02aacfa12f52ce2 ]
For each CRTC state, check the size of Gamma and Degamma LUTs so unexpected and larger sizes wouldn't slip through.
TEST: IGT:kms_color::pipe-invalid-gamma-lut-sizes
v2: fix assignments in if clauses, Mark's email.
Reviewed-by: Harry Wentland harry.wentland@amd.com Signed-off-by: Mark Yacoub markyacoub@chromium.org Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 1 + .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 41 ++++++++++++++++--- 3 files changed, 40 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 58577f7a57c1..35def321bae4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10040,6 +10040,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, dm_old_crtc_state->dsc_force_changed == false) continue;
+ ret = amdgpu_dm_verify_lut_sizes(new_crtc_state); + if (ret) + goto fail; + if (!new_crtc_state->enable) continue;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b2f2ccfc20bb..c8e5bbbb8bce 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -591,6 +591,7 @@ void amdgpu_dm_trigger_timing_sync(struct drm_device *dev); #define MAX_COLOR_LEGACY_LUT_ENTRIES 256
void amdgpu_dm_init_color_mod(void); +int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state); int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc); int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 157fe4efbb59..a022e5bb30a5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -284,6 +284,37 @@ static int __set_input_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; }
+/** + * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of + * the expected size. + * Returns 0 on success. + */ +int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) +{ + const struct drm_color_lut *lut = NULL; + uint32_t size = 0; + + lut = __extract_blob_lut(crtc_state->degamma_lut, &size); + if (lut && size != MAX_COLOR_LUT_ENTRIES) { + DRM_DEBUG_DRIVER( + "Invalid Degamma LUT size. Should be %u but got %u.\n", + MAX_COLOR_LUT_ENTRIES, size); + return -EINVAL; + } + + lut = __extract_blob_lut(crtc_state->gamma_lut, &size); + if (lut && size != MAX_COLOR_LUT_ENTRIES && + size != MAX_COLOR_LEGACY_LUT_ENTRIES) { + DRM_DEBUG_DRIVER( + "Invalid Gamma LUT size. Should be %u (or %u for legacy) but got %u.\n", + MAX_COLOR_LUT_ENTRIES, MAX_COLOR_LEGACY_LUT_ENTRIES, + size); + return -EINVAL; + } + + return 0; +} + /** * amdgpu_dm_update_crtc_color_mgmt: Maps DRM color management to DC stream. * @crtc: amdgpu_dm crtc state @@ -317,14 +348,12 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) bool is_legacy; int r;
- degamma_lut = __extract_blob_lut(crtc->base.degamma_lut, °amma_size); - if (degamma_lut && degamma_size != MAX_COLOR_LUT_ENTRIES) - return -EINVAL; + r = amdgpu_dm_verify_lut_sizes(&crtc->base); + if (r) + return r;
+ degamma_lut = __extract_blob_lut(crtc->base.degamma_lut, °amma_size); regamma_lut = __extract_blob_lut(crtc->base.gamma_lut, ®amma_size); - if (regamma_lut && regamma_size != MAX_COLOR_LUT_ENTRIES && - regamma_size != MAX_COLOR_LEGACY_LUT_ENTRIES) - return -EINVAL;
has_degamma = degamma_lut && !__is_lut_linear(degamma_lut, degamma_size);
From: Jonathan Kim jonathan.kim@amd.com
[ Upstream commit 63f6e01237257e7226efc5087f3f0b525d320f54 ]
get_wave_state acquires the mmap_lock on copy_to_user but so do mmu_notifiers. mmu_notifiers allows dqm locking so do get_wave_state outside the dqm_lock to prevent circular locking.
v2: squash in unused variable removal.
Signed-off-by: Jonathan Kim jonathan.kim@amd.com Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d3eaa1549bd7..ccc9d108caff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1674,29 +1674,27 @@ static int get_wave_state(struct device_queue_manager *dqm, u32 *save_area_used_size) { struct mqd_manager *mqd_mgr; - int r;
dqm_lock(dqm);
- if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || - q->properties.is_active || !q->device->cwsr_enabled) { - r = -EINVAL; - goto dqm_unlock; - } - mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
- if (!mqd_mgr->get_wave_state) { - r = -EINVAL; - goto dqm_unlock; + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled || + !mqd_mgr->get_wave_state) { + dqm_unlock(dqm); + return -EINVAL; }
- r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, - ctl_stack_used_size, save_area_used_size); - -dqm_unlock: dqm_unlock(dqm); - return r; + + /* + * get_wave_state is outside the dqm lock to prevent circular locking + * and the queue should be protected against destruction by the process + * lock. + */ + return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, + ctl_stack_used_size, save_area_used_size); }
static int process_termination_cpsch(struct device_queue_manager *dqm,
From: Wesley Chalmers Wesley.Chalmers@amd.com
[ Upstream commit 78ebca321999699f30ea19029726d1a3908b395f ]
[WHY] When changing the DISPCLK_WDIVIDER value from 126 to 127, the change in clock rate is too great for the FIFOs to handle. This can cause visible corruption during clock change.
HW has handed down this register sequence to fix the issue.
[HOW] The sequence, from HW: a. 127 -> 126 Read DIG_FIFO_CAL_AVERAGE_LEVEL FIFO level N = DIG_FIFO_CAL_AVERAGE_LEVEL / 4 Set DCCG_FIFO_ERRDET_OVR_EN = 1 Write 1 to OTGx_DROP_PIXEL for (N-4) times Set DCCG_FIFO_ERRDET_OVR_EN = 0 Write DENTIST_DISPCLK_RDIVIDER = 126
Because of frequency stepping, sequence a can be executed to change the divider from 127 to any other divider value.
b. 126 -> 127 Read DIG_FIFO_CAL_AVERAGE_LEVEL FIFO level N = DIG_FIFO_CAL_AVERAGE_LEVEL / 4 Set DCCG_FIFO_ERRDET_OVR_EN = 1 Write 1 to OTGx_ADD_PIXEL for (12-N) times Set DCCG_FIFO_ERRDET_OVR_EN = 0 Write DENTIST_DISPCLK_RDIVIDER = 127
Because of frequency stepping, divider must first be set from any other divider value to 126 before executing sequence b.
Signed-off-by: Wesley Chalmers Wesley.Chalmers@amd.com Reviewed-by: Dmytro Laktyushkin Dmytro.Laktyushkin@amd.com Acked-by: Anson Jacob Anson.Jacob@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c | 68 ++++++++++++++++++- .../display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h | 3 +- .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 +- 3 files changed, 69 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 372d53b5a34d..3d335b4e9891 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -123,7 +123,7 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, } }
-void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr) +void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct dc_state *context) { int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR * clk_mgr->base.dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz; @@ -132,6 +132,68 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr)
uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider); uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); + uint32_t current_dispclk_wdivider; + uint32_t i; + + REG_GET(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, ¤t_dispclk_wdivider); + + /* When changing divider to or from 127, some extra programming is required to prevent corruption */ + if (current_dispclk_wdivider == 127 && dispclk_wdivider != 127) { + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + uint32_t fifo_level; + struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg; + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + int32_t N; + int32_t j; + + if (!pipe_ctx->stream) + continue; + /* Virtual encoders don't have this function */ + if (!stream_enc->funcs->get_fifo_cal_average_level) + continue; + fifo_level = stream_enc->funcs->get_fifo_cal_average_level( + stream_enc); + N = fifo_level / 4; + dccg->funcs->set_fifo_errdet_ovr_en( + dccg, + true); + for (j = 0; j < N - 4; j++) + dccg->funcs->otg_drop_pixel( + dccg, + pipe_ctx->stream_res.tg->inst); + dccg->funcs->set_fifo_errdet_ovr_en( + dccg, + false); + } + } else if (dispclk_wdivider == 127 && current_dispclk_wdivider != 127) { + REG_UPDATE(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, 126); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 100); + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct dccg *dccg = clk_mgr->base.ctx->dc->res_pool->dccg; + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + uint32_t fifo_level; + int32_t N; + int32_t j; + + if (!pipe_ctx->stream) + continue; + /* Virtual encoders don't have this function */ + if (!stream_enc->funcs->get_fifo_cal_average_level) + continue; + fifo_level = stream_enc->funcs->get_fifo_cal_average_level( + stream_enc); + N = fifo_level / 4; + dccg->funcs->set_fifo_errdet_ovr_en(dccg, true); + for (j = 0; j < 12 - N; j++) + dccg->funcs->otg_add_pixel(dccg, + pipe_ctx->stream_res.tg->inst); + dccg->funcs->set_fifo_errdet_ovr_en(dccg, false); + } + }
REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); @@ -251,11 +313,11 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, if (dpp_clock_lowered) { // if clock is being lowered, increase DTO before lowering refclk dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); - dcn20_update_clocks_update_dentist(clk_mgr); + dcn20_update_clocks_update_dentist(clk_mgr, context); } else { // if clock is being raised, increase refclk before lowering DTO if (update_dppclk || update_dispclk) - dcn20_update_clocks_update_dentist(clk_mgr); + dcn20_update_clocks_update_dentist(clk_mgr, context); // always update dtos unless clock is lowered and not safe to lower dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h index 0b9c045b0c8e..d254d0b6fba1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h @@ -50,7 +50,8 @@ void dcn2_get_clock(struct clk_mgr *clk_mgr, enum dc_clock_type clock_type, struct dc_clock_config *clock_cfg);
-void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr); +void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, + struct dc_state *context);
void dcn2_read_clocks_from_hw_dentist(struct clk_mgr *clk_mgr_base);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 652fa89fae5f..513676a6f52b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -334,11 +334,11 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, if (dpp_clock_lowered) { /* if clock is being lowered, increase DTO before lowering refclk */ dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); - dcn20_update_clocks_update_dentist(clk_mgr); + dcn20_update_clocks_update_dentist(clk_mgr, context); } else { /* if clock is being raised, increase refclk before lowering DTO */ if (update_dppclk || update_dispclk) - dcn20_update_clocks_update_dentist(clk_mgr); + dcn20_update_clocks_update_dentist(clk_mgr, context); /* There is a check inside dcn20_update_clocks_update_dpp_dto which ensures * that we do not lower dto when it is not safe to lower. We do not need to * compare the current and new dppclk before calling this function.*/
From: Yifan Zhang yifan1.zhang@amd.com
[ Upstream commit 631003101c516ea29a74aee59666708857b9a805 ]
If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.
Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 516467e962b7..c09225d065c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3673,8 +3673,12 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); + /* If GC has entered CGPG, ringing doorbell > first page doesn't + * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround + * this issue. + */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell_index.userqueue_end * 2) << 2); + (adev->doorbell.size - 4)); }
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
On Tue, Jul 6, 2021 at 7:16 AM Sasha Levin sashal@kernel.org wrote:
This should be dropped. It was already reverted.
Alex
From: Amber Lin Amber.Lin@amd.com
[ Upstream commit a7b2451d31cfa2e8aeccf3b35612ce33f02371fc ]
Calling free_mqd inside of destroy_queue_nocpsch_locked can cause a circular lock. destroy_queue_nocpsch_locked is called under a DQM lock, which is taken in MMU notifiers, potentially in FS reclaim context. Taking another lock, which is BO reservation lock from free_mqd, while causing an FS reclaim inside the DQM lock creates a problematic circular lock dependency. Therefore move free_mqd out of destroy_queue_nocpsch_locked and call it after unlocking DQM.
Signed-off-by: Amber Lin Amber.Lin@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ccc9d108caff..e9b3e2e32bf8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, if (retval == -ETIME) qpd->reset_wavefronts = true;
- - mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - list_del(&q->list); if (list_empty(&qpd->queues_list)) { if (qpd->reset_wavefronts) { @@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, int retval; uint64_t sdma_val = 0; struct kfd_process_device *pdd = qpd_to_pdd(qpd); + struct mqd_manager *mqd_mgr = + dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
/* Get the SDMA queue stats */ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || @@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, pdd->sdma_past_activity_counter += sdma_val; dqm_unlock(dqm);
+ mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; }
@@ -1629,7 +1630,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, static int process_termination_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { - struct queue *q, *next; + struct queue *q; struct device_process_node *cur, *next_dpn; int retval = 0; bool found = false; @@ -1637,12 +1638,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, dqm_lock(dqm);
/* Clear all user mode queues */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + while (!list_empty(&qpd->queues_list)) { + struct mqd_manager *mqd_mgr; int ret;
+ q = list_first_entry(&qpd->queues_list, struct queue, list); + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; ret = destroy_queue_nocpsch_locked(dqm, qpd, q); if (ret) retval = ret; + dqm_unlock(dqm); + mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); }
/* Unregister process */
From: "Stanley.Yang" Stanley.Yang@amd.com
[ Upstream commit 6ec598cc9dfbf40433e94a2ed1a622e3ef80268b ]
Signed-off-by: Stanley.Yang Stanley.Yang@amd.com Reviewed-by: Hawking Zhang Hawking.Zhang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index bbcccf53080d..e5a75fb788dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,6 +21,11 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__
+/* + * (addr / 256) * 4096, the higher 26 bits in ErrorAddr + * is the index of 4KB block + */ +#define ADDR_OF_4KB_BLOCK(addr) (((addr) & ~0xffULL) << 4) /* * (addr / 256) * 8192, the higher 26 bits in ErrorAddr * is the index of 8KB block diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index 89d20adfa001..af59a35788e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -234,7 +234,7 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, err_addr &= ~((0x1ULL << lsb) - 1);
/* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + retired_page = ADDR_OF_4KB_BLOCK(err_addr) | ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr);
From: xinhui pan xinhui.pan@amd.com
[ Upstream commit 56f221b6389e7ab99c30bbf01c71998ae92fc584 ]
To avoid any list corruption.
Signed-off-by: xinhui pan xinhui.pan@amd.com Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e9b3e2e32bf8..f0bad74af230 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1709,7 +1709,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { int retval; - struct queue *q, *next; + struct queue *q; struct kernel_queue *kq, *kq_next; struct mqd_manager *mqd_mgr; struct device_process_node *cur, *next_dpn; @@ -1766,24 +1766,26 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; }
- dqm_unlock(dqm); - - /* Outside the DQM lock because under the DQM lock we can't do - * reclaim or take other locks that others hold while reclaiming. - */ - if (found) - kfd_dec_compute_active(dqm->dev); - /* Lastly, free mqd resources. * Do free_mqd() after dqm_unlock to avoid circular locking. */ - list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + while (!list_empty(&qpd->queues_list)) { + q = list_first_entry(&qpd->queues_list, struct queue, list); mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; list_del(&q->list); qpd->queue_count--; + dqm_unlock(dqm); mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + dqm_lock(dqm); } + dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev);
return retval; }
From: Yifan Zhang yifan1.zhang@amd.com
[ Upstream commit 962f2f1ae273399e357a3192d5413ca57f9b4885 ]
This reverts commit 631003101c516ea29a74aee59666708857b9a805.
Reason for revert: side effect of enlarging CP_MEC_DOORBELL_RANGE may cause some APUs fail to enter gfxoff in certain user cases.
Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index c09225d065c2..516467e962b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3673,12 +3673,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); }
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
From: Logush Oliver ollogush@amd.com
[ Upstream commit eeb90e26ed05dd44553d557057bf35f08f853af8 ]
[why] Updating the file to fix the missing line
Signed-off-by: Logush Oliver ollogush@amd.com Reviewed-by: Charlene Liu Charlene.Liu@amd.com Acked-by: Bindu Ramamurthy bindu.r@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index d79f4fe06c47..4812a72f8aad 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2131,7 +2131,7 @@ static enum bp_result get_integrated_info_v2_1( info_v2_1->edp1_info.edp_pwr_down_bloff_to_vary_bloff; info->edp1_info.edp_panel_bpc = info_v2_1->edp1_info.edp_panel_bpc; - info->edp1_info.edp_bootup_bl_level = + info->edp1_info.edp_bootup_bl_level = info_v2_1->edp1_info.edp_bootup_bl_level;
info->edp2_info.edp_backlight_pwm_hz = le16_to_cpu(info_v2_1->edp2_info.edp_backlight_pwm_hz);
Am Dienstag, dem 06.07.2021 um 07:11 -0400 schrieb Sasha Levin:
While the subject contains "fix" this only removes a duplicated NULL check, so the code is correct before and after this change. Is this really stable material? Doesn't this just add commit noise to the stable kernels?
Regards, Lucas
dri-devel@lists.freedesktop.org