From: Christian König christian.koenig@amd.com
Previously we were completely over allocating, fix this by actually implementing the size calculation.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 -- drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/radeon_vce.c | 3 +-- drivers/gpu/drm/radeon/vce_v2_0.c | 16 +++++++++++++--- 4 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 46eb0fa..34c51a2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1709,8 +1709,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, * VCE */ #define RADEON_MAX_VCE_HANDLES 16 -#define RADEON_VCE_STACK_SIZE (1024*1024) -#define RADEON_VCE_HEAP_SIZE (4*1024*1024)
struct radeon_vce { struct radeon_bo *vcpu_bo; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index a3ca8cd..cb7787f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -974,6 +974,7 @@ int vce_v1_0_init(struct radeon_device *rdev); int vce_v1_0_start(struct radeon_device *rdev);
/* vce v2.0 */ +unsigned vce_v2_0_bo_size(struct radeon_device *rdev); int vce_v2_0_resume(struct radeon_device *rdev);
#endif diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 0de5711..5b952bd 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -123,8 +123,7 @@ int radeon_vce_init(struct radeon_device *rdev)
/* allocate firmware, stack and heap BO */
- size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + - RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + size = vce_v2_0_bo_size(rdev); r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo); diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index fbbe78f..cdeaab7 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -31,6 +31,10 @@ #include "radeon_asic.h" #include "cikd.h"
+#define VCE_V2_0_FW_SIZE (256 * 1024) +#define VCE_V2_0_STACK_SIZE (64 * 1024) +#define VCE_V2_0_DATA_SIZE (23552 * RADEON_MAX_VCE_HANDLES) + static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) { u32 tmp; @@ -140,6 +144,12 @@ static void vce_v2_0_init_cg(struct radeon_device *rdev) WREG32(VCE_CLOCK_GATING_B, tmp); }
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev) +{ + WARN_ON(rdev->vce_fw->size > VCE_V2_0_FW_SIZE); + return VCE_V2_0_FW_SIZE + VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE; +} + int vce_v2_0_resume(struct radeon_device *rdev) { uint64_t addr = rdev->vce.gpu_addr; @@ -159,17 +169,17 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8);
addr &= 0xff; - size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); + size = VCE_V2_0_FW_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE0, size);
addr += size; - size = RADEON_VCE_STACK_SIZE; + size = VCE_V2_0_STACK_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE1, size);
addr += size; - size = RADEON_VCE_HEAP_SIZE; + size = VCE_V2_0_DATA_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE2, size);
From: Christian König christian.koenig@amd.com
They seem to work fine with the kernel interface.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_vce.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 5b952bd..aa5d9ba 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -118,7 +118,9 @@ int radeon_vce_init(struct radeon_device *rdev) rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
/* we can only work with this fw version for now */ - if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) + if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) && + (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) && + (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8)))) return -EINVAL;
/* allocate firmware, stack and heap BO */
From: Christian König christian.koenig@amd.com
For setting clocks with VCE v1.0
v2: (chk) rebased on current tree
Signed-off-by: Christian König christian.koenig@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon.h | 7 ++ drivers/gpu/drm/radeon/radeon_asic.c | 1 + drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/si.c | 121 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 27 ++++++++ 5 files changed, 157 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 34c51a2..38603b1 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2577,6 +2577,13 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f) tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) +#define WREG32_SMC_P(reg, val, mask) \ + do { \ + uint32_t tmp_ = RREG32_SMC(reg); \ + tmp_ &= (mask); \ + tmp_ |= ((val) & ~(mask)); \ + WREG32_SMC(reg, tmp_); \ + } while (0) #define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false)) #define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 8dbf508..4227b30 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1973,6 +1973,7 @@ static struct radeon_asic si_asic = { .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = NULL, .set_uvd_clocks = &si_set_uvd_clocks, + .set_vce_clocks = &si_set_vce_clocks, .get_temperature = &si_get_temp, }, .dpm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index cb7787f..0469d44 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -745,6 +745,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); +int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); int si_get_temp(struct radeon_device *rdev); int si_get_allowed_info_register(struct radeon_device *rdev, u32 reg, u32 *val); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index b1d74bc..6ff78bc 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -7675,3 +7675,124 @@ static void si_program_aspm(struct radeon_device *rdev) } } } + +int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev) +{ + unsigned i; + + /* make sure VCEPLL_CTLREQ is deasserted */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* assert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < 100; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + + /* deassert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + if (i == 100) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0; + int r; + + /* bypass evclk and ecclk with bclk */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + /* put PLL in bypass mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK, + ~VCEPLL_BYPASS_EN_MASK); + + if (!evclk || !ecclk) { + /* keep the Bypass mode, put PLL to sleep */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + return 0; + } + + r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &evclk_div, &ecclk_div); + if (r) + return r; + + /* set RESET_ANTI_MUX to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* set VCO_MODE to 1 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK, + ~VCEPLL_VCO_MODE_MASK); + + /* toggle VCEPLL_SLEEP to 1 then back to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK); + + /* deassert VCEPLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(1); + + r = si_vce_send_vcepll_ctlreq(rdev); + if (r) + return r; + + /* assert VCEPLL_RESET again */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK); + + /* disable spread spectrum. */ + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* set feedback divider */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK); + + /* set ref divider to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK); + + /* set PDIV_A and PDIV_B */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div), + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK)); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(15); + + /* switch from bypass mode to normal mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK); + + r = si_vce_send_vcepll_ctlreq(rdev); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 3afac30..1630440 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1917,4 +1917,31 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006
+/* discrete vce clocks */ +#define CG_VCEPLL_FUNC_CNTL 0xc0030600 +# define VCEPLL_RESET_MASK 0x00000001 +# define VCEPLL_SLEEP_MASK 0x00000002 +# define VCEPLL_BYPASS_EN_MASK 0x00000004 +# define VCEPLL_CTLREQ_MASK 0x00000008 +# define VCEPLL_VCO_MODE_MASK 0x00000600 +# define VCEPLL_REF_DIV_MASK 0x003F0000 +# define VCEPLL_CTLACK_MASK 0x40000000 +# define VCEPLL_CTLACK2_MASK 0x80000000 +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601 +# define VCEPLL_PDIV_A(x) ((x) << 0) +# define VCEPLL_PDIV_A_MASK 0x0000007F +# define VCEPLL_PDIV_B(x) ((x) << 8) +# define VCEPLL_PDIV_B_MASK 0x00007F00 +# define EVCLK_SRC_SEL(x) ((x) << 20) +# define EVCLK_SRC_SEL_MASK 0x01F00000 +# define ECCLK_SRC_SEL(x) ((x) << 25) +# define ECCLK_SRC_SEL_MASK 0x3E000000 +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602 +# define VCEPLL_FB_DIV(x) ((x) << 0) +# define VCEPLL_FB_DIV_MASK 0x01FFFFFF +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603 +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604 +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606 +# define VCEPLL_SSEN_MASK 0x00000001 + #endif
From: Alex Deucher alexander.deucher@amd.com
This implements the function to set the vce clocks on TN hardware.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/ni.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/nid.h | 7 +++++++ drivers/gpu/drm/radeon/radeon_asic.c | 1 + drivers/gpu/drm/radeon/radeon_asic.h | 1 + 4 files changed, 40 insertions(+)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index e8a496f..32f5f03 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2553,3 +2553,34 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); } + +int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + struct atom_clock_dividers dividers; + int r, i; + + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ecclk, false, ÷rs); + if (r) + return r; + + for (i = 0; i < 100; i++) { + if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + WREG32_P(CG_ECLK_CNTL, dividers.post_div, ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK)); + + for (i = 0; i < 100; i++) { + if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + return 0; +} diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 3b29083..47eb49b 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -46,6 +46,13 @@
#define DMIF_ADDR_CONFIG 0xBD4
+/* fusion vce clocks */ +#define CG_ECLK_CNTL 0x620 +# define ECLK_DIVIDER_MASK 0x7f +# define ECLK_DIR_CNTL_EN (1 << 8) +#define CG_ECLK_STATUS 0x624 +# define ECLK_STATUS (1 << 0) + /* DCE6 only */ #define DMIF_ADDR_CALC 0xC00
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 4227b30..b37b22bd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1838,6 +1838,7 @@ static struct radeon_asic trinity_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &sumo_set_uvd_clocks, + .set_vce_clocks = &tn_set_vce_clocks, .get_temperature = &tn_get_temp, }, .dpm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 0469d44..629f291 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -694,6 +694,7 @@ int trinity_dpm_force_performance_level(struct radeon_device *rdev, void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable); u32 trinity_dpm_get_current_sclk(struct radeon_device *rdev); u32 trinity_dpm_get_current_mclk(struct radeon_device *rdev); +int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
/* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev);
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/trinity_dpm.c | 76 ++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+)
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index a5b02c5..e0d0780 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -985,6 +985,15 @@ static void trinity_set_uvd_clock_after_set_eng_clock(struct radeon_device *rdev trinity_setup_uvd_clocks(rdev, new_rps, old_rps); }
+static void trinity_set_vce_clock(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) + radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); +} + static void trinity_program_ttt(struct radeon_device *rdev) { struct trinity_power_info *pi = trinity_get_pi(rdev); @@ -1246,6 +1255,7 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_force_level_0(rdev); trinity_unforce_levels(rdev); trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + trinity_set_vce_clock(rdev, new_ps, old_ps); } trinity_release_mutex(rdev);
@@ -1483,7 +1493,35 @@ static void trinity_adjust_uvd_state(struct radeon_device *rdev, } }
+static int trinity_get_vce_clock_voltage(struct radeon_device *rdev, + u32 evclk, u32 ecclk, u16 *voltage) +{ + u32 i; + int ret = -EINVAL; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + if (((evclk == 0) && (ecclk == 0)) || + (table && (table->count == 0))) { + *voltage = 0; + return 0; + } + + for (i = 0; i < table->count; i++) { + if ((evclk <= table->entries[i].evclk) && + (ecclk <= table->entries[i].ecclk)) { + *voltage = table->entries[i].v; + ret = 0; + break; + } + } + + /* if no match return the highest voltage */ + if (ret) + *voltage = table->entries[table->count - 1].v;
+ return ret; +}
static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *new_rps, @@ -1496,6 +1534,7 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, u32 min_sclk = pi->sys_info.min_sclk; /* XXX check against disp reqs */ u32 sclk_in_sr = pi->sys_info.min_sclk; /* ??? */ u32 i; + u16 min_vce_voltage; bool force_high; u32 num_active_displays = rdev->pm.dpm.new_active_crtc_count;
@@ -1504,6 +1543,14 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
trinity_adjust_uvd_state(rdev, new_rps);
+ if (new_rps->vce_active) { + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + new_rps->evclk = 0; + new_rps->ecclk = 0; + } + for (i = 0; i < ps->num_levels; i++) { if (ps->levels[i].vddc_index < min_voltage) ps->levels[i].vddc_index = min_voltage; @@ -1512,6 +1559,17 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, ps->levels[i].sclk = trinity_get_valid_engine_clock(rdev, min_sclk);
+ /* patch in vce limits */ + if (new_rps->vce_active) { + /* sclk */ + if (ps->levels[i].sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + ps->levels[i].sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + /* vddc */ + trinity_get_vce_clock_voltage(rdev, new_rps->evclk, new_rps->ecclk, &min_vce_voltage); + if (ps->levels[i].vddc_index < min_vce_voltage) + ps->levels[i].vddc_index = min_vce_voltage; + } + ps->levels[i].ds_divider_index = sumo_get_sleep_divider_id_from_clock(rdev, ps->levels[i].sclk, sclk_in_sr);
@@ -1733,6 +1791,19 @@ static int trinity_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = 0; + } + return 0; }
@@ -1914,6 +1985,10 @@ int trinity_dpm_init(struct radeon_device *rdev) if (ret) return ret;
+ ret = r600_parse_extended_power_table(rdev); + if (ret) + return ret; + ret = trinity_parse_power_table(rdev); if (ret) return ret; @@ -2000,6 +2075,7 @@ void trinity_dpm_fini(struct radeon_device *rdev) } kfree(rdev->pm.dpm.ps); kfree(rdev->pm.dpm.priv); + r600_free_extended_power_table(rdev); }
u32 trinity_dpm_get_sclk(struct radeon_device *rdev, bool low)
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/si_dpm.c | 102 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index ff8b83f..69cd4ca 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2928,6 +2928,56 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { 0, 0, 0, 0 }, };
+static u16 si_get_lower_of_leakage_and_vce_voltage(struct radeon_device *rdev, + u16 vce_voltage) +{ + u16 highest_leakage = 0; + struct si_power_info *si_pi = si_get_pi(rdev); + int i; + + for (i = 0; i < si_pi->leakage_voltage.count; i++){ + if (highest_leakage < si_pi->leakage_voltage.entries[i].voltage) + highest_leakage = si_pi->leakage_voltage.entries[i].voltage; + } + + if (si_pi->leakage_voltage.count && (highest_leakage < vce_voltage)) + return highest_leakage; + + return vce_voltage; +} + +static int si_get_vce_clock_voltage(struct radeon_device *rdev, + u32 evclk, u32 ecclk, u16 *voltage) +{ + u32 i; + int ret = -EINVAL; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + if (((evclk == 0) && (ecclk == 0)) || + (table && (table->count == 0))) { + *voltage = 0; + return 0; + } + + for (i = 0; i < table->count; i++) { + if ((evclk <= table->entries[i].evclk) && + (ecclk <= table->entries[i].ecclk)) { + *voltage = table->entries[i].v; + ret = 0; + break; + } + } + + /* if no match return the highest voltage */ + if (ret) + *voltage = table->entries[table->count - 1].v; + + *voltage = si_get_lower_of_leakage_and_vce_voltage(rdev, *voltage); + + return ret; +} + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2936,7 +2986,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching = false; bool disable_sclk_switching = false; u32 mclk, sclk; - u16 vddc, vddci; + u16 vddc, vddci, min_vce_voltage = 0; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; u32 max_sclk = 0, max_mclk = 0; int i; @@ -2955,6 +3005,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, ++p; }
+ if (rps->vce_active) { + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + si_get_vce_clock_voltage(rdev, rps->evclk, rps->ecclk, + &min_vce_voltage); + } else { + rps->evclk = 0; + rps->ecclk = 0; + } + if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) disable_mclk_switching = true; @@ -3035,6 +3095,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, vddc = ps->performance_levels[0].vddc; }
+ if (rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; + } + /* adjusted low state */ ps->performance_levels[0].sclk = sclk; ps->performance_levels[0].mclk = mclk; @@ -3084,6 +3151,8 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, &ps->performance_levels[i]);
for (i = 0; i < ps->performance_level_count; i++) { + if (ps->performance_levels[i].vddc < min_vce_voltage) + ps->performance_levels[i].vddc = min_vce_voltage; btc_apply_voltage_dependency_rules(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, ps->performance_levels[i].sclk, max_limits->vddc, &ps->performance_levels[i].vddc); @@ -3110,7 +3179,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].vddc > rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.vddc) ps->dc_compatible = false; } - }
#if 0 @@ -5859,6 +5927,15 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev, } }
+static void si_set_vce_clock(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) + radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); +} + void si_dpm_setup_asic(struct radeon_device *rdev) { int r; @@ -6547,6 +6624,7 @@ int si_dpm_set_power_state(struct radeon_device *rdev) return ret; } ni_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + si_set_vce_clock(rdev, new_ps, old_ps); if (eg_pi->pcie_performance_request) si_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); ret = si_set_power_state_conditionally_enable_ulv(rdev, new_ps); @@ -6793,6 +6871,21 @@ static int si_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk, mclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->si.usEngineClockLow); + sclk |= clock_info->si.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->si.usMemoryClockLow); + mclk |= clock_info->si.ucMemoryClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = mclk; + } + return 0; }
@@ -6837,10 +6930,11 @@ int si_dpm_init(struct radeon_device *rdev) if (ret) return ret;
- ret = si_parse_power_table(rdev); + ret = r600_parse_extended_power_table(rdev); if (ret) return ret; - ret = r600_parse_extended_power_table(rdev); + + ret = si_parse_power_table(rdev); if (ret) return ret;
From: Christian König christian.koenig@amd.com
Initial support for VCE 1.0 using newest firmware.
v2: rebased v3: fix for TN v4: fix FW size calculation
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/ni.c | 46 ++++++++++++ drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_asic.c | 17 +++++ drivers/gpu/drm/radeon/radeon_asic.h | 3 + drivers/gpu/drm/radeon/radeon_vce.c | 23 +++++- drivers/gpu/drm/radeon/si.c | 46 ++++++++++++ drivers/gpu/drm/radeon/sid.h | 1 + drivers/gpu/drm/radeon/vce_v1_0.c | 140 +++++++++++++++++++++++++++++++++++ 8 files changed, 274 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 32f5f03..faffca3 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2040,6 +2040,25 @@ static int cayman_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+ if (rdev->family == CHIP_ARUBA) { + r = radeon_vce_resume(rdev); + if (!r) + r = vce_v1_0_resume(rdev); + + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + } + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); @@ -2117,6 +2136,19 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); }
+ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -2272,6 +2304,19 @@ int cayman_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); }
+ if (rdev->family == CHIP_ARUBA) { + r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -2325,6 +2370,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 38603b1..59480fd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1719,6 +1719,7 @@ struct radeon_vce { struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; unsigned img_size[RADEON_MAX_VCE_HANDLES]; struct delayed_work idle_work; + uint32_t keyselect; };
int radeon_vce_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index b37b22bd..eaf909e 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = { }, };
+static struct radeon_asic_ring trinity_vce_ring = { + .ib_execute = &radeon_vce_ib_execute, + .emit_fence = &radeon_vce_fence_emit, + .emit_semaphore = &radeon_vce_semaphore_emit, + .cs_parse = &radeon_vce_cs_parse, + .ring_test = &radeon_vce_ring_test, + .ib_test = &radeon_vce_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &vce_v1_0_get_rptr, + .get_wptr = &vce_v1_0_get_wptr, + .set_wptr = &vce_v1_0_set_wptr, +}; + static struct radeon_asic trinity_asic = { .init = &cayman_init, .fini = &cayman_fini, @@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = { [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1930,6 +1945,8 @@ static struct radeon_asic si_asic = { [R600_RING_TYPE_DMA_INDEX] = &si_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring, }, .irq = { .set = &si_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 629f291..e0aa332 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -972,6 +972,9 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void vce_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data); +unsigned vce_v1_0_bo_size(struct radeon_device *rdev); +int vce_v1_0_resume(struct radeon_device *rdev); int vce_v1_0_init(struct radeon_device *rdev); int vce_v1_0_start(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index aa5d9ba..574f62b 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -38,8 +38,10 @@ #define VCE_IDLE_TIMEOUT_MS 1000
/* Firmware Names */ +#define FIRMWARE_TAHITI "radeon/TAHITI_vce.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
+MODULE_FIRMWARE(FIRMWARE_TAHITI); MODULE_FIRMWARE(FIRMWARE_BONAIRE);
static void radeon_vce_idle_work_handler(struct work_struct *work); @@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
switch (rdev->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_ARUBA: + fw_name = FIRMWARE_TAHITI; + break; + case CHIP_BONAIRE: case CHIP_KAVERI: case CHIP_KABINI: @@ -125,7 +135,10 @@ int radeon_vce_init(struct radeon_device *rdev)
/* allocate firmware, stack and heap BO */
- size = vce_v2_0_bo_size(rdev); + if (rdev->family < CHIP_BONAIRE) + size = vce_v1_0_bo_size(rdev); + else + size = vce_v2_0_bo_size(rdev); r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo); @@ -226,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev) return r; }
- memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); + if (rdev->family < CHIP_BONAIRE) + r = vce_v1_0_load_fw(rdev, cpu_addr); + else + memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
radeon_bo_kunmap(rdev->vce.vcpu_bo);
radeon_bo_unreserve(rdev->vce.vcpu_bo);
- return 0; + return r; }
/** diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 6ff78bc..5ca08e3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; }
+ r = radeon_vce_resume(rdev); + if (!r) { + r = vce_v1_0_resume(rdev); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + } + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev) } }
+ r = -ENOENT; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + radeon_vce_suspend(rdev); } si_fini_pg(rdev); si_fini_cg(rdev); @@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev) } }
+ r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); } si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 1630440..4823a07 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1879,6 +1879,7 @@ #define VCE_VCPU_CACHE_SIZE1 0x20030 #define VCE_VCPU_CACHE_OFFSET2 0x20034 #define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_VCPU_SCRATCH7 0x200dc #define VCE_SOFT_RESET 0x20120 #define VCE_ECPU_SOFT_RESET (1 << 0) #define VCE_FME_SOFT_RESET (1 << 2) diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index b44d9c8..81dd39b 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -31,6 +31,23 @@ #include "radeon_asic.h" #include "sid.h"
+#define VCE_V1_0_FW_SIZE (256 * 1024) +#define VCE_V1_0_STACK_SIZE (64 * 1024) +#define VCE_V1_0_DATA_SIZE (7808 * (RADEON_MAX_VCE_HANDLES + 1)) + +struct vce_v1_0_fw_signature +{ + int32_t off; + uint32_t len; + int32_t num; + struct { + uint32_t chip_id; + uint32_t keyselect; + uint32_t nonce[4]; + uint32_t sigval[4]; + } val[8]; +}; + /** * vce_v1_0_get_rptr - get read pointer * @@ -82,6 +99,129 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev, WREG32(VCE_RB_WPTR2, ring->wptr); }
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) +{ + struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data; + uint32_t chip_id; + int i; + + switch (rdev->family) { + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + case CHIP_OLAND: + chip_id = 0x01000016; + break; + case CHIP_ARUBA: + chip_id = 0x01000017; + break; + default: + return -EINVAL; + } + + for (i = 0; i < sign->num; ++i) { + if (sign->val[i].chip_id == chip_id) + break; + } + + if (i == sign->num) + return -EINVAL; + + data += (256 - 64) / 4; + data[0] = sign->val[i].nonce[0]; + data[1] = sign->val[i].nonce[1]; + data[2] = sign->val[i].nonce[2]; + data[3] = sign->val[i].nonce[3]; + data[4] = sign->len + 64; + + memset(&data[5], 0, 44); + memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); + + data += data[4] / 4; + data[0] = sign->val[i].sigval[0]; + data[1] = sign->val[i].sigval[1]; + data[2] = sign->val[i].sigval[2]; + data[3] = sign->val[i].sigval[3]; + + rdev->vce.keyselect = sign->val[i].keyselect; + + return 0; +} + +unsigned vce_v1_0_bo_size(struct radeon_device *rdev) +{ + WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size); + return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE; +} + +int vce_v1_0_resume(struct radeon_device *rdev) +{ + uint64_t addr = rdev->vce.gpu_addr; + uint32_t size; + int i; + + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(VCE_CLOCK_GATING_B, 0); + + WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4); + + WREG32(VCE_LMI_CTRL, 0x00398000); + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(VCE_LMI_SWAP_CNTL, 0); + WREG32(VCE_LMI_SWAP_CNTL1, 0); + WREG32(VCE_LMI_VM_CTRL, 0); + + WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES); + + addr += 256; + size = VCE_V1_0_FW_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE0, size); + + addr += size; + size = VCE_V1_0_STACK_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE1, size); + + addr += size; + size = VCE_V1_0_DATA_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); + + WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect); + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS)) + return -EINVAL; + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY)) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + return 0; +} + /** * vce_v1_0_start - start VCE block *
Hi Christian,
this commit is causing a boot regression with v4.2-rcX on my Richland APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down where exactly it is going wrong, but I bisected it down to this single commit.
I don't have the VCE firmware installed on this system, so from a quick look at the code I would expect it to drop out pretty early and just leave the VCE unconfigured, but otherwise keep things working as before. This is unfortunately not the case.
Maybe you can take a look at what's wrong here. We are already pretty late in the release cycle and I'm unsure if I can find any more time to look into the issue before the final release.
Thanks, Lucas
Am Montag, den 11.05.2015, 22:01 +0200 schrieb Christian König:
From: Christian König christian.koenig@amd.com
Initial support for VCE 1.0 using newest firmware.
v2: rebased v3: fix for TN v4: fix FW size calculation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/ni.c | 46 ++++++++++++ drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_asic.c | 17 +++++ drivers/gpu/drm/radeon/radeon_asic.h | 3 + drivers/gpu/drm/radeon/radeon_vce.c | 23 +++++- drivers/gpu/drm/radeon/si.c | 46 ++++++++++++ drivers/gpu/drm/radeon/sid.h | 1 + drivers/gpu/drm/radeon/vce_v1_0.c | 140 +++++++++++++++++++++++++++++++++++ 8 files changed, 274 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 32f5f03..faffca3 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2040,6 +2040,25 @@ static int cayman_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
- if (rdev->family == CHIP_ARUBA) {
r = radeon_vce_resume(rdev);
if (!r)
r = vce_v1_0_resume(rdev);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE1_INDEX);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE2_INDEX);
if (r) {
dev_err(rdev->dev, "VCE init error (%d).\n", r);
rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
}
- }
- r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -2117,6 +2136,19 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); }
- ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
- if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
- ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
- if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
- if (!r)
r = vce_v1_0_init(rdev);
- else if (r != -ENOENT)
DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
- r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2272,6 +2304,19 @@ int cayman_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); }
- if (rdev->family == CHIP_ARUBA) {
r = radeon_vce_init(rdev);
if (!r) {
ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
}
- }
- rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -2325,6 +2370,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev);
- radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 38603b1..59480fd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1719,6 +1719,7 @@ struct radeon_vce { struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; unsigned img_size[RADEON_MAX_VCE_HANDLES]; struct delayed_work idle_work;
- uint32_t keyselect;
};
int radeon_vce_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index b37b22bd..eaf909e 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = { }, };
+static struct radeon_asic_ring trinity_vce_ring = {
- .ib_execute = &radeon_vce_ib_execute,
- .emit_fence = &radeon_vce_fence_emit,
- .emit_semaphore = &radeon_vce_semaphore_emit,
- .cs_parse = &radeon_vce_cs_parse,
- .ring_test = &radeon_vce_ring_test,
- .ib_test = &radeon_vce_ib_test,
- .is_lockup = &radeon_ring_test_lockup,
- .get_rptr = &vce_v1_0_get_rptr,
- .get_wptr = &vce_v1_0_get_wptr,
- .set_wptr = &vce_v1_0_set_wptr,
+};
static struct radeon_asic trinity_asic = { .init = &cayman_init, .fini = &cayman_fini, @@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = { [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
}, .irq = { .set = &evergreen_irq_set,[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
@@ -1930,6 +1945,8 @@ static struct radeon_asic si_asic = { [R600_RING_TYPE_DMA_INDEX] = &si_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
[TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
}, .irq = { .set = &si_irq_set,[TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 629f291..e0aa332 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -972,6 +972,9 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void vce_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data); +unsigned vce_v1_0_bo_size(struct radeon_device *rdev); +int vce_v1_0_resume(struct radeon_device *rdev); int vce_v1_0_init(struct radeon_device *rdev); int vce_v1_0_start(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index aa5d9ba..574f62b 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -38,8 +38,10 @@ #define VCE_IDLE_TIMEOUT_MS 1000
/* Firmware Names */ +#define FIRMWARE_TAHITI "radeon/TAHITI_vce.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
+MODULE_FIRMWARE(FIRMWARE_TAHITI); MODULE_FIRMWARE(FIRMWARE_BONAIRE);
static void radeon_vce_idle_work_handler(struct work_struct *work); @@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
switch (rdev->family) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_ARUBA:
fw_name = FIRMWARE_TAHITI;
break;
- case CHIP_BONAIRE: case CHIP_KAVERI: case CHIP_KABINI:
@@ -125,7 +135,10 @@ int radeon_vce_init(struct radeon_device *rdev)
/* allocate firmware, stack and heap BO */
- size = vce_v2_0_bo_size(rdev);
- if (rdev->family < CHIP_BONAIRE)
size = vce_v1_0_bo_size(rdev);
- else
r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo);size = vce_v2_0_bo_size(rdev);
@@ -226,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev) return r; }
- memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
if (rdev->family < CHIP_BONAIRE)
r = vce_v1_0_load_fw(rdev, cpu_addr);
else
memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
radeon_bo_kunmap(rdev->vce.vcpu_bo);
radeon_bo_unreserve(rdev->vce.vcpu_bo);
- return 0;
- return r;
}
/** diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 6ff78bc..5ca08e3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; }
- r = radeon_vce_resume(rdev);
- if (!r) {
r = vce_v1_0_resume(rdev);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE1_INDEX);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE2_INDEX);
- }
- if (r) {
dev_err(rdev->dev, "VCE init error (%d).\n", r);
rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
- }
- /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev);
@@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev) } }
- r = -ENOENT;
- ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
- if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
VCE_CMD_NO_OP);
- ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
- if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
VCE_CMD_NO_OP);
- if (!r)
r = vce_v1_0_init(rdev);
- else if (r != -ENOENT)
DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
- r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev);
} si_fini_pg(rdev); si_fini_cg(rdev);radeon_vce_suspend(rdev);
@@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev) } }
- r = radeon_vce_init(rdev);
- if (!r) {
ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
- }
- rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev);
} si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev);radeon_vce_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 1630440..4823a07 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1879,6 +1879,7 @@ #define VCE_VCPU_CACHE_SIZE1 0x20030 #define VCE_VCPU_CACHE_OFFSET2 0x20034 #define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_VCPU_SCRATCH7 0x200dc #define VCE_SOFT_RESET 0x20120 #define VCE_ECPU_SOFT_RESET (1 << 0) #define VCE_FME_SOFT_RESET (1 << 2) diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index b44d9c8..81dd39b 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -31,6 +31,23 @@ #include "radeon_asic.h" #include "sid.h"
+#define VCE_V1_0_FW_SIZE (256 * 1024) +#define VCE_V1_0_STACK_SIZE (64 * 1024) +#define VCE_V1_0_DATA_SIZE (7808 * (RADEON_MAX_VCE_HANDLES + 1))
+struct vce_v1_0_fw_signature +{
- int32_t off;
- uint32_t len;
- int32_t num;
- struct {
uint32_t chip_id;
uint32_t keyselect;
uint32_t nonce[4];
uint32_t sigval[4];
- } val[8];
+};
/**
- vce_v1_0_get_rptr - get read pointer
@@ -82,6 +99,129 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev, WREG32(VCE_RB_WPTR2, ring->wptr); }
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) +{
- struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data;
- uint32_t chip_id;
- int i;
- switch (rdev->family) {
- case CHIP_TAHITI:
chip_id = 0x01000014;
break;
- case CHIP_VERDE:
chip_id = 0x01000015;
break;
- case CHIP_PITCAIRN:
- case CHIP_OLAND:
chip_id = 0x01000016;
break;
- case CHIP_ARUBA:
chip_id = 0x01000017;
break;
- default:
return -EINVAL;
- }
- for (i = 0; i < sign->num; ++i) {
if (sign->val[i].chip_id == chip_id)
break;
- }
- if (i == sign->num)
return -EINVAL;
- data += (256 - 64) / 4;
- data[0] = sign->val[i].nonce[0];
- data[1] = sign->val[i].nonce[1];
- data[2] = sign->val[i].nonce[2];
- data[3] = sign->val[i].nonce[3];
- data[4] = sign->len + 64;
- memset(&data[5], 0, 44);
- memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
- data += data[4] / 4;
- data[0] = sign->val[i].sigval[0];
- data[1] = sign->val[i].sigval[1];
- data[2] = sign->val[i].sigval[2];
- data[3] = sign->val[i].sigval[3];
- rdev->vce.keyselect = sign->val[i].keyselect;
- return 0;
+}
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev) +{
- WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size);
- return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE;
+}
+int vce_v1_0_resume(struct radeon_device *rdev) +{
- uint64_t addr = rdev->vce.gpu_addr;
- uint32_t size;
- int i;
- WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16));
- WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
- WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
- WREG32(VCE_CLOCK_GATING_B, 0);
- WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
- WREG32(VCE_LMI_CTRL, 0x00398000);
- WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1);
- WREG32(VCE_LMI_SWAP_CNTL, 0);
- WREG32(VCE_LMI_SWAP_CNTL1, 0);
- WREG32(VCE_LMI_VM_CTRL, 0);
- WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES);
- addr += 256;
- size = VCE_V1_0_FW_SIZE;
- WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
- WREG32(VCE_VCPU_CACHE_SIZE0, size);
- addr += size;
- size = VCE_V1_0_STACK_SIZE;
- WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
- WREG32(VCE_VCPU_CACHE_SIZE1, size);
- addr += size;
- size = VCE_V1_0_DATA_SIZE;
- WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
- WREG32(VCE_VCPU_CACHE_SIZE2, size);
- WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100);
- WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect);
- for (i = 0; i < 10; ++i) {
mdelay(10);
if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE)
break;
- }
- if (i == 10)
return -ETIMEDOUT;
- if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS))
return -EINVAL;
- for (i = 0; i < 10; ++i) {
mdelay(10);
if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY))
break;
- }
- if (i == 10)
return -ETIMEDOUT;
- return 0;
+}
/**
- vce_v1_0_start - start VCE block
On 13.08.2015 15:03, Lucas Stach wrote:
Hi Christian,
this commit is causing a boot regression with v4.2-rcX on my Richland APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down where exactly it is going wrong, but I bisected it down to this single commit.
I don't have the VCE firmware installed on this system, so from a quick look at the code I would expect it to drop out pretty early and just leave the VCE unconfigured, but otherwise keep things working as before. This is unfortunately not the case.
If the radeon driver is built into the kernel (or loaded from the initrd?), the attempt to load the firmware might take a long time to time out.
Please provide more information about the symptoms, e.g. any dmesg output etc.
Am Donnerstag, den 13.08.2015, 15:18 +0900 schrieb Michel Dänzer:
On 13.08.2015 15:03, Lucas Stach wrote:
Hi Christian,
this commit is causing a boot regression with v4.2-rcX on my Richland APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down where exactly it is going wrong, but I bisected it down to this single commit.
I don't have the VCE firmware installed on this system, so from a quick look at the code I would expect it to drop out pretty early and just leave the VCE unconfigured, but otherwise keep things working as before. This is unfortunately not the case.
If the radeon driver is built into the kernel (or loaded from the initrd?), the attempt to load the firmware might take a long time to time out.
Gah. Thanks, I was too impatient to wait for the firmware loading to time out. In fact this is a standard Fedora kernel config, so radeon is a module, but it is built into the initrd.
So it's not really readeons fault, but one more iteration of the fact that anything involving firmware loading is just horribly inconvenient. Especially if it's firmware for an optional component.
Regards, Lucas
On 13.08.2015 08:36, Lucas Stach wrote:
Am Donnerstag, den 13.08.2015, 15:18 +0900 schrieb Michel Dänzer:
On 13.08.2015 15:03, Lucas Stach wrote:
Hi Christian,
this commit is causing a boot regression with v4.2-rcX on my Richland APU (CHIP_ARUBA) based laptop. I didn't have time yet to track down where exactly it is going wrong, but I bisected it down to this single commit.
I don't have the VCE firmware installed on this system, so from a quick look at the code I would expect it to drop out pretty early and just leave the VCE unconfigured, but otherwise keep things working as before. This is unfortunately not the case.
If the radeon driver is built into the kernel (or loaded from the initrd?), the attempt to load the firmware might take a long time to time out.
Gah. Thanks, I was too impatient to wait for the firmware loading to time out. In fact this is a standard Fedora kernel config, so radeon is a module, but it is built into the initrd.
So it's not really readeons fault, but one more iteration of the fact that anything involving firmware loading is just horribly inconvenient. Especially if it's firmware for an optional component.
Yeah, exactly. The timeout for loading the firmware is really long on both the standard Fedora as well as Ubuntu kernels. Not sure if that's the default or just their configuration.
While it doesn't have the highest priority for us I usually still do tests if working without firmware still works once or twice during the upstreaming process. So if you really find that the box doesn't work without the firmware leave me a note and I will fix it.
Best regards, Christian.
Regards, Lucas
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon_asic.c | 2 ++ drivers/gpu/drm/radeon/sid.h | 1 + drivers/gpu/drm/radeon/vce_v1_0.c | 57 ++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eaf909e..f2421bc 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2455,6 +2455,8 @@ int radeon_asic_init(struct radeon_device *rdev) /* set num crtcs */ rdev->num_crtc = 4; rdev->has_uvd = true; + rdev->cg_flags = + RADEON_CG_SUPPORT_VCE_MGCG; break; case CHIP_TAHITI: case CHIP_PITCAIRN: diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 4823a07..4c4a721 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1894,6 +1894,7 @@ #define VCE_RB_RPTR 0x2018c #define VCE_RB_WPTR 0x20190 #define VCE_CLOCK_GATING_A 0x202f8 +# define CGC_DYN_CLOCK_MODE (1 << 16) #define VCE_CLOCK_GATING_B 0x202fc #define VCE_UENC_CLOCK_GATING 0x205bc #define VCE_UENC_REG_CLOCK_GATING 0x205c0 diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index 81dd39b..07a0d37 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -99,6 +99,61 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev, WREG32(VCE_RB_WPTR2, ring->wptr); }
+void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + u32 tmp; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp |= CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0x1ff000; + tmp |= 0xff800000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } else { + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp &= ~CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0x1ff000; + tmp &= ~0xff800000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static void vce_v1_0_init_cg(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp |= CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0x1e; + tmp &= ~0xe100e1; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0xff9ff000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); +} + int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) { struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data; @@ -219,6 +274,8 @@ int vce_v1_0_resume(struct radeon_device *rdev) if (i == 10) return -ETIMEDOUT;
+ vce_v1_0_init_cg(rdev); + return 0; }
From: Alex Deucher alexander.deucher@amd.com
Some of the vce clocks are automatic, others need to be manually enabled. For ease, just disable cg when vce is active.
v2: rebased, call vce_v1_0_enable_mgcg directly
Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/si_dpm.c | 9 ++++++++- drivers/gpu/drm/radeon/trinity_dpm.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 69cd4ca..1dbdf32 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1740,6 +1740,7 @@ struct ni_power_info *ni_get_pi(struct radeon_device *rdev); struct ni_ps *ni_get_ps(struct radeon_ps *rps);
extern int si_mc_load_microcode(struct radeon_device *rdev); +extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable);
static int si_populate_voltage_value(struct radeon_device *rdev, const struct atom_voltage_table *table, @@ -5932,8 +5933,14 @@ static void si_set_vce_clock(struct radeon_device *rdev, struct radeon_ps *old_rps) { if ((old_rps->evclk != new_rps->evclk) || - (old_rps->ecclk != new_rps->ecclk)) + (old_rps->ecclk != new_rps->ecclk)) { + /* turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) + vce_v1_0_enable_mgcg(rdev, false); + else + vce_v1_0_enable_mgcg(rdev, true); radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); + } }
void si_dpm_setup_asic(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index e0d0780..d34bfcda 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -336,6 +336,7 @@ static const u32 trinity_override_mgpg_sequences[] = 0x00000204, 0x00000000, };
+extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable); static void trinity_program_clk_gating_hw_sequence(struct radeon_device *rdev, const u32 *seq, u32 count); static void trinity_override_dynamic_mg_powergating(struct radeon_device *rdev); @@ -990,8 +991,14 @@ static void trinity_set_vce_clock(struct radeon_device *rdev, struct radeon_ps *old_rps) { if ((old_rps->evclk != new_rps->evclk) || - (old_rps->ecclk != new_rps->ecclk)) + (old_rps->ecclk != new_rps->ecclk)) { + /* turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) + vce_v1_0_enable_mgcg(rdev, false); + else + vce_v1_0_enable_mgcg(rdev, true); radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); + } }
static void trinity_program_ttt(struct radeon_device *rdev)
On Mon, May 11, 2015 at 4:01 PM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Previously we were completely over allocating, fix this by actually implementing the size calculation.
Signed-off-by: Christian König christian.koenig@amd.com
Applied the series to my next tree.
Alex
drivers/gpu/drm/radeon/radeon.h | 2 -- drivers/gpu/drm/radeon/radeon_asic.h | 1 + drivers/gpu/drm/radeon/radeon_vce.c | 3 +-- drivers/gpu/drm/radeon/vce_v2_0.c | 16 +++++++++++++--- 4 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 46eb0fa..34c51a2 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1709,8 +1709,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
- VCE
*/ #define RADEON_MAX_VCE_HANDLES 16 -#define RADEON_VCE_STACK_SIZE (1024*1024) -#define RADEON_VCE_HEAP_SIZE (4*1024*1024)
struct radeon_vce { struct radeon_bo *vcpu_bo; diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index a3ca8cd..cb7787f 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -974,6 +974,7 @@ int vce_v1_0_init(struct radeon_device *rdev); int vce_v1_0_start(struct radeon_device *rdev);
/* vce v2.0 */ +unsigned vce_v2_0_bo_size(struct radeon_device *rdev); int vce_v2_0_resume(struct radeon_device *rdev);
#endif diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 0de5711..5b952bd 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -123,8 +123,7 @@ int radeon_vce_init(struct radeon_device *rdev)
/* allocate firmware, stack and heap BO */
size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
size = vce_v2_0_bo_size(rdev); r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo);
diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index fbbe78f..cdeaab7 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -31,6 +31,10 @@ #include "radeon_asic.h" #include "cikd.h"
+#define VCE_V2_0_FW_SIZE (256 * 1024) +#define VCE_V2_0_STACK_SIZE (64 * 1024) +#define VCE_V2_0_DATA_SIZE (23552 * RADEON_MAX_VCE_HANDLES)
static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) { u32 tmp; @@ -140,6 +144,12 @@ static void vce_v2_0_init_cg(struct radeon_device *rdev) WREG32(VCE_CLOCK_GATING_B, tmp); }
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev) +{
WARN_ON(rdev->vce_fw->size > VCE_V2_0_FW_SIZE);
return VCE_V2_0_FW_SIZE + VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE;
+}
int vce_v2_0_resume(struct radeon_device *rdev) { uint64_t addr = rdev->vce.gpu_addr; @@ -159,17 +169,17 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8);
addr &= 0xff;
size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size);
size = VCE_V2_0_FW_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE0, size); addr += size;
size = RADEON_VCE_STACK_SIZE;
size = VCE_V2_0_STACK_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE1, size); addr += size;
size = RADEON_VCE_HEAP_SIZE;
size = VCE_V2_0_DATA_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE2, size);
-- 1.9.1
dri-devel@lists.freedesktop.org