Hi everyone,
here is the next major code drop for the radeon driver. It adds support for VCE2 as found on the newest CIK hardware generation. VCE1 as found on SI and Trinity hardware generation is currently not supported, but might get added in the future.
VCE is basically a hardware H264 encoding engine. It's optimized for low latency encoding as found on wireless display, but can be used for general purpose encoding as well.
The kernel patches implement everything needed to get the hardware block up and running. The mesa patches then implement a basic H264 encoder on top of that. The support for encoding is currently exposed through the OpenMAX state tracker. Beside on the proper mailing lists the patches can be found here as well:
Kernel: http://cgit.freedesktop.org/~deathsimple/linux/log/?h=vce-release Mesa: http://cgit.freedesktop.org/~deathsimple/mesa/log/?h=vce-release
Additional to the new code a firmware file (BONAIRE_vce.bin) is needed which can be found here: http://people.freedesktop.org/~agd5f/radeon_ucode/
Please review and as always comments are very welcome, Christian.
From: Christian König christian.koenig@amd.com
Only VCE 2.0 support so far.
v2: squashing multiple patches into this one v3: add IRQ support for CIK, major cleanups, basic code documentation v4: remove HAINAN from chipset list
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/Makefile | 6 + drivers/gpu/drm/radeon/cik.c | 60 ++++ drivers/gpu/drm/radeon/cikd.h | 33 ++ drivers/gpu/drm/radeon/radeon.h | 56 +++- drivers/gpu/drm/radeon/radeon_asic.c | 17 + drivers/gpu/drm/radeon/radeon_asic.h | 13 + drivers/gpu/drm/radeon/radeon_cs.c | 4 + drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_ring.c | 4 + drivers/gpu/drm/radeon/radeon_test.c | 39 ++- drivers/gpu/drm/radeon/radeon_vce.c | 588 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 47 +++ drivers/gpu/drm/radeon/vce_v1_0.c | 187 +++++++++++ drivers/gpu/drm/radeon/vce_v2_0.c | 70 +++++ include/uapi/drm/radeon_drm.h | 1 + 15 files changed, 1117 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vce.c create mode 100644 drivers/gpu/drm/radeon/vce_v1_0.c create mode 100644 drivers/gpu/drm/radeon/vce_v2_0.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 306364a..ed60caa 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -99,6 +99,12 @@ radeon-y += \ uvd_v3_1.o \ uvd_v4_2.o
+# add VCE block +radeon-y += \ + radeon_vce.o \ + vce_v1_0.o \ + vce_v2_0.o \ + radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca..be6eb4d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7490,6 +7490,20 @@ restart_ih: /* reset addr and status */ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); break; + case 167: /* VCE */ + DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data); + switch (src_data) { + case 0: + radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX); + break; + case 1: + radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); + break; + } + break; case 176: /* GFX RB CP_INT */ case 177: /* GFX IB CP_INT */ radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); @@ -7789,6 +7803,22 @@ static int cik_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+ r = radeon_vce_resume(rdev); + if (!r) { + r = vce_v2_0_resume(rdev); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + } + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -7864,6 +7894,23 @@ static int cik_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); }
+ r = -ENOENT; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -7934,6 +7981,7 @@ int cik_suspend(struct radeon_device *rdev) cik_sdma_enable(rdev, false); uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + radeon_vce_suspend(rdev); cik_fini_pg(rdev); cik_fini_cg(rdev); cik_irq_suspend(rdev); @@ -8066,6 +8114,17 @@ int cik_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); }
+ r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -8127,6 +8186,7 @@ void cik_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 98bae9d7..459ae02 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2010,4 +2010,37 @@ /* UVD CTX indirect */ #define UVD_CGC_MEM_CTRL 0xC0
+/* VCE */ + +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x207bc +#define VCE_UENC_REG_CLOCK_GATING 0x207c0 +#define VCE_SYS_INT_EN 0x21300 +# define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +#define VCE_LMI_CTRL2 0x21474 +#define VCE_LMI_CTRL 0x21498 +#define VCE_LMI_VM_CTRL 0x214a0 +#define VCE_LMI_SWAP_CNTL 0x214b4 +#define VCE_LMI_SWAP_CNTL1 0x214b8 +#define VCE_LMI_CACHE_CTRL 0x214f4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4a8ac1c..a9295b9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,19 +113,16 @@ extern int radeon_hard_reset; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8
-/* max number of rings */ -#define RADEON_NUM_RINGS 6 - /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL
/* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0
/* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2
/* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 @@ -133,7 +130,14 @@ extern int radeon_hard_reset; #define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* R600+ */ -#define R600_RING_TYPE_UVD_INDEX 5 +#define R600_RING_TYPE_UVD_INDEX 5 + +/* TN+ */ +#define TN_RING_TYPE_VCE1_INDEX 6 +#define TN_RING_TYPE_VCE2_INDEX 7 + +/* max number of rings */ +#define RADEON_NUM_RINGS 8
/* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) @@ -1589,6 +1593,42 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl);
+/* + * VCE + */ +#define RADEON_MAX_VCE_HANDLES 16 +#define RADEON_VCE_STACK_SIZE (1024*1024) +#define RADEON_VCE_HEAP_SIZE (4*1024*1024) + +struct radeon_vce { + struct radeon_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + atomic_t handles[RADEON_MAX_VCE_HANDLES]; + struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; +}; + +int radeon_vce_init(struct radeon_device *rdev); +void radeon_vce_fini(struct radeon_device *rdev); +int radeon_vce_suspend(struct radeon_device *rdev); +int radeon_vce_resume(struct radeon_device *rdev); +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence); +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); +int radeon_vce_cs_parse(struct radeon_cs_parser *p); +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait); +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); + struct r600_audio_pin { int channels; int rate; @@ -2184,6 +2224,7 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; struct radeon_uvd uvd; + struct radeon_vce vce; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct radeon_wb wb; struct radeon_dummy_page dummy_page; @@ -2203,6 +2244,7 @@ struct radeon_device { const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ const struct firmware *uvd_fw; /* UVD firmware */ + const struct firmware *vce_fw; /* VCE firmware */ struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index f74db43..e9eaba99 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1987,6 +1987,19 @@ static struct radeon_asic_ring ci_dma_ring = { .set_wptr = &cik_sdma_set_wptr, };
+static struct radeon_asic_ring ci_vce_ring = { + .ib_execute = &radeon_vce_ib_execute, + .emit_fence = &radeon_vce_fence_emit, + .emit_semaphore = &radeon_vce_semaphore_emit, + .cs_parse = &radeon_vce_cs_parse, + .ring_test = &radeon_vce_ring_test, + .ib_test = &radeon_vce_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &vce_v1_0_get_rptr, + .get_wptr = &vce_v1_0_get_wptr, + .set_wptr = &vce_v1_0_set_wptr, +}; + static struct radeon_asic ci_asic = { .init = &cik_init, .fini = &cik_fini, @@ -2015,6 +2028,8 @@ static struct radeon_asic ci_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, @@ -2117,6 +2132,8 @@ static struct radeon_asic kv_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b3bc433..2017efd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -861,4 +861,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, /* uvd v4.2 */ int uvd_v4_2_resume(struct radeon_device *rdev);
+/* vce v1.0 */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring); +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring); +int vce_v1_0_init(struct radeon_device *rdev); +int vce_v1_0_start(struct radeon_device *rdev); + +/* vce v2.0 */ +int vce_v2_0_resume(struct radeon_device *rdev); + #endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dfb5a1d..701ee79 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -147,6 +147,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority case RADEON_CS_RING_UVD: p->ring = R600_RING_TYPE_UVD_INDEX; break; + case RADEON_CS_RING_VCE: + /* TODO: only use the low priority ring for now */ + p->ring = TN_RING_TYPE_VCE1_INDEX; + break; } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 114d167..0e078af 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -610,6 +610,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, if (rdev->cmask_filp == file_priv) rdev->cmask_filp = NULL; radeon_uvd_free_handles(rdev, file_priv); + radeon_vce_free_handles(rdev, file_priv); }
/* diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1b783f0..149cd63 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -814,6 +814,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; +static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; +static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX;
static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, @@ -822,6 +824,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index}, + {"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index}, + {"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index}, };
static int radeon_debugfs_sa_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 12e8099..3a13e0d 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_fence **fence) { + uint32_t handle = ring->idx ^ 0xdeafbeef; int r;
if (ring->idx == R600_RING_TYPE_UVD_INDEX) { - r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL); + r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; }
- r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence); + r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; } + + } else if (ring->idx == TN_RING_TYPE_VCE1_INDEX || + ring->idx == TN_RING_TYPE_VCE2_INDEX) { + r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL); + if (r) { + DRM_ERROR("Failed to get dummy create msg\n"); + return r; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence); + if (r) { + DRM_ERROR("Failed to get dummy destroy msg\n"); + return r; + } + } else { r = radeon_ring_lock(rdev, ring, 64); if (r) { @@ -486,6 +502,16 @@ out_cleanup: printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }
+static bool radeon_test_sync_possible(struct radeon_ring *ringA, + struct radeon_ring *ringB) +{ + if (ringA->idx == TN_RING_TYPE_VCE2_INDEX && + ringB->idx == TN_RING_TYPE_VCE1_INDEX) + return false; + + return true; +} + void radeon_test_syncing(struct radeon_device *rdev) { int i, j, k; @@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringB->ready) continue;
+ if (!radeon_test_sync_possible(ringA, ringB)) + continue; + DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); radeon_test_ring_sync(rdev, ringA, ringB);
@@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringC->ready) continue;
+ if (!radeon_test_sync_possible(ringA, ringC)) + continue; + + if (!radeon_test_sync_possible(ringB, ringC)) + continue; + DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); radeon_test_ring_sync2(rdev, ringA, ringB, ringC);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c new file mode 100644 index 0000000..2547d8e --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -0,0 +1,588 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König christian.koenig@amd.com + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/* Firmware Names */ +#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" + +MODULE_FIRMWARE(FIRMWARE_BONAIRE); + +/** + * radeon_vce_init - allocate memory, load vce firmware + * + * @rdev: radeon_device pointer + * + * First step to get VCE online, allocate memory and load the firmware + */ +int radeon_vce_init(struct radeon_device *rdev) +{ + unsigned long bo_size; + const char *fw_name; + int i, r; + + switch (rdev->family) { + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + fw_name = FIRMWARE_BONAIRE; + break; + + default: + return -EINVAL; + } + + r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev); + if (r) { + dev_err(rdev->dev, "radeon_vce: Can't load firmware "%s"\n", + fw_name); + return r; + } + + bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); + if (r) { + dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); + return r; + } + + r = radeon_vce_resume(rdev); + if (r) + return r; + + memset(rdev->vce.cpu_addr, 0, bo_size); + memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + + r = radeon_vce_suspend(rdev); + if (r) + return r; + + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + atomic_set(&rdev->vce.handles[i], 0); + rdev->vce.filp[i] = NULL; + } + + return 0; +} + +/** + * radeon_vce_fini - free memory + * + * @rdev: radeon_device pointer + * + * Last step on VCE teardown, free firmware memory + */ +void radeon_vce_fini(struct radeon_device *rdev) +{ + radeon_vce_suspend(rdev); + radeon_bo_unref(&rdev->vce.vcpu_bo); +} + +/** + * radeon_vce_suspend - unpin VCE fw memory + * + * @rdev: radeon_device pointer + * + * TODO: Test VCE suspend/resume + */ +int radeon_vce_suspend(struct radeon_device *rdev) +{ + int r; + + if (rdev->vce.vcpu_bo == NULL) + return 0; + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (!r) { + radeon_bo_kunmap(rdev->vce.vcpu_bo); + radeon_bo_unpin(rdev->vce.vcpu_bo); + radeon_bo_unreserve(rdev->vce.vcpu_bo); + } + return r; +} + +/** + * radeon_vce_resume - pin VCE fw memory + * + * @rdev: radeon_device pointer + * + * TODO: Test VCE suspend/resume + */ +int radeon_vce_resume(struct radeon_device *rdev) +{ + int r; + + if (rdev->vce.vcpu_bo == NULL) + return -EINVAL; + + r = radeon_bo_reserve(rdev->vce.vcpu_bo, false); + if (r) { + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r); + return r; + } + + r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM, + &rdev->vce.gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->vce.vcpu_bo); + radeon_bo_unref(&rdev->vce.vcpu_bo); + dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r); + return r; + } + + r = radeon_bo_kmap(rdev->vce.vcpu_bo, &rdev->vce.cpu_addr); + if (r) { + dev_err(rdev->dev, "(%d) VCE map failed\n", r); + return r; + } + + radeon_bo_unreserve(rdev->vce.vcpu_bo); + + return 0; +} + +/** + * radeon_vce_free_handles - free still open VCE handles + * + * @rdev: radeon_device pointer + * @filp: drm file pointer + * + * Close all VCE handles still open by this file pointer + */ +void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) +{ + int i, r; + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + uint32_t handle = atomic_read(&rdev->vce.handles[i]); + if (!handle || rdev->vce.filp[i] != filp) + continue; + + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, + handle, NULL); + if (r) + DRM_ERROR("Error destroying VCE handle (%d)!\n", r); + + rdev->vce.filp[i] = NULL; + atomic_set(&rdev->vce.handles[i], 0); + } +} + +/** + * radeon_vce_get_create_msg - generate a VCE create msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Open up a stream for HW test + */ +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE create msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000030; /* len */ + ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */ + ib.ptr[ib.length_dw++] = 0x00000000; + ib.ptr[ib.length_dw++] = 0x00000042; + ib.ptr[ib.length_dw++] = 0x0000000a; + ib.ptr[ib.length_dw++] = 0x00000001; + ib.ptr[ib.length_dw++] = 0x00000080; + ib.ptr[ib.length_dw++] = 0x00000060; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x00000100; + ib.ptr[ib.length_dw++] = 0x0000000c; + ib.ptr[ib.length_dw++] = 0x00000000; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_get_destroy_msg - generate a VCE destroy msg + * + * @rdev: radeon_device pointer + * @ring: ring we should submit the msg to + * @handle: VCE session handle to use + * @fence: optional fence to return + * + * Close up a stream for HW test or if userspace failed to do so + */ +int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, + uint32_t handle, struct radeon_fence **fence) +{ + const unsigned ib_size_dw = 1024; + struct radeon_ib ib; + uint64_t dummy; + int i, r; + + r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4); + if (r) { + DRM_ERROR("radeon: failed to get ib (%d).\n", r); + return r; + } + + dummy = ib.gpu_addr + 1024; + + /* stitch together an VCE destroy msg */ + ib.length_dw = 0; + ib.ptr[ib.length_dw++] = 0x0000000c; /* len */ + ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */ + ib.ptr[ib.length_dw++] = handle; + + ib.ptr[ib.length_dw++] = 0x00000014; /* len */ + ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */ + ib.ptr[ib.length_dw++] = upper_32_bits(dummy); + ib.ptr[ib.length_dw++] = dummy; + ib.ptr[ib.length_dw++] = 0x00000001; + + ib.ptr[ib.length_dw++] = 0x00000008; /* len */ + ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */ + + for (i = ib.length_dw; i < ib_size_dw; ++i) + ib.ptr[i] = 0x0; + + r = radeon_ib_schedule(rdev, &ib, NULL); + if (r) { + DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); + } + + if (fence) + *fence = radeon_fence_ref(ib.fence); + + radeon_ib_free(rdev, &ib); + + return r; +} + +/** + * radeon_vce_cs_reloc - command submission relocation + * + * @p: parser context + * @lo: address of lower dword + * @hi: address of higher dword + * + * Patch relocation inside command stream with real buffer address + */ +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi) +{ + struct radeon_cs_chunk *relocs_chunk; + uint64_t offset; + unsigned idx; + + relocs_chunk = &p->chunks[p->chunk_relocs_idx]; + offset = radeon_get_ib_value(p, lo); + idx = radeon_get_ib_value(p, hi); + + if (idx >= relocs_chunk->length_dw) { + DRM_ERROR("Relocs at %d after relocations chunk end %d !\n", + idx, relocs_chunk->length_dw); + return -EINVAL; + } + + offset += p->relocs_ptr[(idx / 4)]->lobj.gpu_offset; + + p->ib.ptr[lo] = offset & 0xFFFFFFFF; + p->ib.ptr[hi] = offset >> 32; + + return 0; +} + +/** + * radeon_vce_cs_parse - parse and validate the command stream + * + * @p: parser context + * + */ +int radeon_vce_cs_parse(struct radeon_cs_parser *p) +{ + uint32_t handle = 0; + bool destroy = false; + int i, r; + + while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) { + uint32_t len = radeon_get_ib_value(p, p->idx); + uint32_t cmd = radeon_get_ib_value(p, p->idx + 1); + + if ((len < 8) || (len & 3)) { + DRM_ERROR("invalid VCE command length (%d)!\n", len); + return -EINVAL; + } + + switch (cmd) { + case 0x00000001: // session + handle = radeon_get_ib_value(p, p->idx + 2); + break; + + case 0x00000002: // task info + case 0x01000001: // create + case 0x04000001: // config extension + case 0x04000002: // pic control + case 0x04000005: // rate control + case 0x04000007: // motion estimation + case 0x04000008: // rdo + break; + + case 0x03000001: // encode + r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9); + if (r) + return r; + + r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11); + if (r) + return r; + break; + + case 0x02000001: // destroy + destroy = true; + break; + + case 0x05000001: // context buffer + case 0x05000004: // video bitstream buffer + case 0x05000005: // feedback buffer + r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2); + if (r) + return r; + break; + + default: + DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); + return -EINVAL; + } + + p->idx += len / 4; + } + + if (destroy) { + /* IB contains a destroy msg, free the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) + atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0); + + return 0; + } + + /* create or encode, validate the handle */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (atomic_read(&p->rdev->vce.handles[i]) == handle) + return 0; + } + + /* handle not found try to alloc a new one */ + for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) { + if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) { + p->rdev->vce.filp[i] = p->filp; + return 0; + } + } + + DRM_ERROR("No more free VCE handles!\n"); + return -EINVAL; +} + +/** + * radeon_vce_semaphore_emit - emit a semaphore command + * + * @rdev: radeon_device pointer + * @ring: engine to use + * @semaphore: address of semaphore + * @emit_wait: true=emit wait, false=emit signal + * + */ +bool radeon_vce_semaphore_emit(struct radeon_device *rdev, + struct radeon_ring *ring, + struct radeon_semaphore *semaphore, + bool emit_wait) +{ + uint64_t addr = semaphore->gpu_addr; + + radeon_ring_write(ring, VCE_CMD_SEMAPHORE); + radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF); + radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF); + radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); + if (!emit_wait) + radeon_ring_write(ring, VCE_CMD_END); + + return true; +} + +/** + * radeon_vce_ib_execute - execute indirect buffer + * + * @rdev: radeon_device pointer + * @ib: the IB to execute + * + */ +void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + radeon_ring_write(ring, VCE_CMD_IB); + radeon_ring_write(ring, ib->gpu_addr); + radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); + radeon_ring_write(ring, ib->length_dw); +} + +/** + * radeon_vce_fence_emit - add a fence command to the ring + * + * @rdev: radeon_device pointer + * @fence: the fence + * + */ +void radeon_vce_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, VCE_CMD_FENCE); + radeon_ring_write(ring, addr); + radeon_ring_write(ring, upper_32_bits(addr)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, VCE_CMD_TRAP); + radeon_ring_write(ring, VCE_CMD_END); +} + +/** + * radeon_vce_ring_test - test if VCE ring is working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + uint32_t rptr = vce_v1_0_get_rptr(rdev, ring); + unsigned i; + int r; + + r = radeon_ring_lock(rdev, ring, 16); + if (r) { + DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + radeon_ring_write(ring, VCE_CMD_END); + radeon_ring_unlock_commit(rdev, ring); + + for (i = 0; i < rdev->usec_timeout; i++) { + if (vce_v1_0_get_rptr(rdev, ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < rdev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("radeon: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +/** + * radeon_vce_ib_test - test if VCE IBs are working + * + * @rdev: radeon_device pointer + * @ring: the engine to test on + * + */ +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) +{ + struct radeon_fence *fence = NULL; + int r; + + r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL); + if (r) { + DRM_ERROR("radeon: failed to get create msg (%d).\n", r); + goto error; + } + + r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence); + if (r) { + DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r); + goto error; + } + + r = radeon_fence_wait(fence, false); + if (r) { + DRM_ERROR("radeon: fence wait failed (%d).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + } +error: + radeon_fence_unref(&fence); + return r; +} diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 9239a6d..683532f 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1798,4 +1798,51 @@ #define DMA_PACKET_CONSTANT_FILL 0xd #define DMA_PACKET_NOP 0xf
+#define VCE_STATUS 0x20004 +#define VCE_VCPU_CNTL 0x20014 +#define VCE_CLK_EN (1 << 0) +#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_SOFT_RESET 0x20120 +#define VCE_ECPU_SOFT_RESET (1 << 0) +#define VCE_FME_SOFT_RESET (1 << 2) +#define VCE_RB_BASE_LO2 0x2016c +#define VCE_RB_BASE_HI2 0x20170 +#define VCE_RB_SIZE2 0x20174 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_BASE_LO 0x20180 +#define VCE_RB_BASE_HI 0x20184 +#define VCE_RB_SIZE 0x20188 +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x205bc +#define VCE_UENC_REG_CLOCK_GATING 0x205c0 +#define VCE_FW_REG_STATUS 0x20e10 +# define VCE_FW_REG_STATUS_BUSY (1 << 0) +# define VCE_FW_REG_STATUS_PASS (1 << 3) +# define VCE_FW_REG_STATUS_DONE (1 << 11) +#define VCE_LMI_FW_START_KEYSEL 0x20e18 +#define VCE_LMI_FW_PERIODIC_CTRL 0x20e20 +#define VCE_LMI_CTRL2 0x20e74 +#define VCE_LMI_CTRL 0x20e98 +#define VCE_LMI_VM_CTRL 0x20ea0 +#define VCE_LMI_SWAP_CNTL 0x20eb4 +#define VCE_LMI_SWAP_CNTL1 0x20eb8 +#define VCE_LMI_CACHE_CTRL 0x20ef4 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + #endif diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c new file mode 100644 index 0000000..e0c3534 --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -0,0 +1,187 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König christian.koenig@amd.com + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h" + +/** + * vce_v1_0_get_rptr - get read pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware read pointer + */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_RPTR); + else + return RREG32(VCE_RB_RPTR2); +} + +/** + * vce_v1_0_get_wptr - get write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Returns the current hardware write pointer + */ +uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + return RREG32(VCE_RB_WPTR); + else + return RREG32(VCE_RB_WPTR2); +} + +/** + * vce_v1_0_set_wptr - set write pointer + * + * @rdev: radeon_device pointer + * @ring: radeon_ring pointer + * + * Commits the write pointer to the hardware + */ +void vce_v1_0_set_wptr(struct radeon_device *rdev, + struct radeon_ring *ring) +{ + if (ring->idx == TN_RING_TYPE_VCE1_INDEX) + WREG32(VCE_RB_WPTR, ring->wptr); + else + WREG32(VCE_RB_WPTR2, ring->wptr); +} + +/** + * vce_v1_0_start - start VCE block + * + * @rdev: radeon_device pointer + * + * Setup and start the VCE block + */ +int vce_v1_0_start(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int i, j, r; + + /* set BUSY flag */ + WREG32_P(VCE_STATUS, 1, ~1); + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + WREG32(VCE_RB_RPTR, ring->rptr); + WREG32(VCE_RB_WPTR, ring->wptr); + WREG32(VCE_RB_BASE_LO, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE, ring->ring_size / 4); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + WREG32(VCE_RB_RPTR2, ring->rptr); + WREG32(VCE_RB_WPTR2, ring->wptr); + WREG32(VCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(VCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(VCE_RB_SIZE2, ring->ring_size / 4); + + WREG32_P(VCE_VCPU_CNTL, VCE_CLK_EN, ~VCE_CLK_EN); + + WREG32_P(VCE_SOFT_RESET, + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + mdelay(100); + + WREG32_P(VCE_SOFT_RESET, 0, ~( + VCE_ECPU_SOFT_RESET | + VCE_FME_SOFT_RESET)); + + for (i = 0; i < 10; ++i) { + uint32_t status; + for (j = 0; j < 100; ++j) { + status = RREG32(VCE_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); + WREG32_P(VCE_SOFT_RESET, VCE_ECPU_SOFT_RESET, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + WREG32_P(VCE_SOFT_RESET, 0, ~VCE_ECPU_SOFT_RESET); + mdelay(10); + r = -1; + } + + /* clear BUSY flag */ + WREG32_P(VCE_STATUS, 0, ~1); + + if (r) { + DRM_ERROR("VCE not responding, giving up!!!\n"); + return r; + } + + return 0; +} + +int vce_v1_0_init(struct radeon_device *rdev) +{ + struct radeon_ring *ring; + int r; + + r = vce_v1_0_start(rdev); + if (r) + return r; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE1_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ready = true; + r = radeon_ring_test(rdev, TN_RING_TYPE_VCE2_INDEX, ring); + if (r) { + ring->ready = false; + return r; + } + + DRM_INFO("VCE initialized successfully.\n"); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c new file mode 100644 index 0000000..4911d1b --- /dev/null +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -0,0 +1,70 @@ +/* + * Copyright 2013 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * Authors: Christian König christian.koenig@amd.com + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "radeon.h" +#include "radeon_asic.h" +#include "cikd.h" + +int vce_v2_0_resume(struct radeon_device *rdev) +{ + uint64_t addr = rdev->vce.gpu_addr; + uint32_t size; + + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(VCE_CLOCK_GATING_B, 0xf7); + + WREG32(VCE_LMI_CTRL, 0x00398000); + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(VCE_LMI_SWAP_CNTL, 0); + WREG32(VCE_LMI_SWAP_CNTL1, 0); + WREG32(VCE_LMI_VM_CTRL, 0); + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_VCE_STACK_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_VCE_HEAP_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); + + WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, + ~VCE_SYS_INT_TRAP_INTERRUPT_EN); + + return 0; +} diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index d9ea3a7..6493ca5 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -919,6 +919,7 @@ struct drm_radeon_gem_va { #define RADEON_CS_RING_COMPUTE 1 #define RADEON_CS_RING_DMA 2 #define RADEON_CS_RING_UVD 3 +#define RADEON_CS_RING_VCE 4 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ /* 0 = normal, + = higher priority, - = lower priority */
On Tue, Feb 4, 2014 at 10:17 AM, Christian König deathsimple@vodafone.de wrote:
From: Christian König christian.koenig@amd.com
Only VCE 2.0 support so far.
v2: squashing multiple patches into this one v3: add IRQ support for CIK, major cleanups, basic code documentation v4: remove HAINAN from chipset list
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/radeon/Makefile | 6 + drivers/gpu/drm/radeon/cik.c | 60 ++++ drivers/gpu/drm/radeon/cikd.h | 33 ++ drivers/gpu/drm/radeon/radeon.h | 56 +++- drivers/gpu/drm/radeon/radeon_asic.c | 17 + drivers/gpu/drm/radeon/radeon_asic.h | 13 + drivers/gpu/drm/radeon/radeon_cs.c | 4 + drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_ring.c | 4 + drivers/gpu/drm/radeon/radeon_test.c | 39 ++- drivers/gpu/drm/radeon/radeon_vce.c | 588 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/sid.h | 47 +++ drivers/gpu/drm/radeon/vce_v1_0.c | 187 +++++++++++ drivers/gpu/drm/radeon/vce_v2_0.c | 70 +++++ include/uapi/drm/radeon_drm.h | 1 + 15 files changed, 1117 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_vce.c create mode 100644 drivers/gpu/drm/radeon/vce_v1_0.c create mode 100644 drivers/gpu/drm/radeon/vce_v2_0.c
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 306364a..ed60caa 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -99,6 +99,12 @@ radeon-y += \ uvd_v3_1.o \ uvd_v4_2.o
+# add VCE block +radeon-y += \
radeon_vce.o \
vce_v1_0.o \
vce_v2_0.o \
radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o radeon-$(CONFIG_ACPI) += radeon_acpi.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca..be6eb4d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7490,6 +7490,20 @@ restart_ih: /* reset addr and status */ WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); break;
case 167: /* VCE */
DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
switch (src_data) {
case 0:
radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
break;
case 1:
radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
}
break; case 176: /* GFX RB CP_INT */ case 177: /* GFX IB CP_INT */ radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
@@ -7789,6 +7803,22 @@ static int cik_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
r = radeon_vce_resume(rdev);
if (!r) {
r = vce_v2_0_resume(rdev);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE1_INDEX);
if (!r)
r = radeon_fence_driver_start_ring(rdev,
TN_RING_TYPE_VCE2_INDEX);
}
if (r) {
dev_err(rdev->dev, "VCE init error (%d).\n", r);
rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
}
/* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev);
@@ -7864,6 +7894,23 @@ static int cik_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); }
r = -ENOENT;
ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
VCE_CMD_NO_OP);
ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
if (ring->ring_size)
r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
VCE_CMD_NO_OP);
if (!r)
r = vce_v1_0_init(rdev);
else if (r != -ENOENT)
DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -7934,6 +7981,7 @@ int cik_suspend(struct radeon_device *rdev) cik_sdma_enable(rdev, false); uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev);
radeon_vce_suspend(rdev); cik_fini_pg(rdev); cik_fini_cg(rdev); cik_irq_suspend(rdev);
@@ -8066,6 +8114,17 @@ int cik_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); }
r = radeon_vce_init(rdev);
if (!r) {
ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 4096);
}
rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024);
@@ -8127,6 +8186,7 @@ void cik_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev);
radeon_vce_fini(rdev); cik_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 98bae9d7..459ae02 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2010,4 +2010,37 @@ /* UVD CTX indirect */ #define UVD_CGC_MEM_CTRL 0xC0
+/* VCE */
+#define VCE_VCPU_CACHE_OFFSET0 0x20024 +#define VCE_VCPU_CACHE_SIZE0 0x20028 +#define VCE_VCPU_CACHE_OFFSET1 0x2002c +#define VCE_VCPU_CACHE_SIZE1 0x20030 +#define VCE_VCPU_CACHE_OFFSET2 0x20034 +#define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_RB_RPTR2 0x20178 +#define VCE_RB_WPTR2 0x2017c +#define VCE_RB_RPTR 0x2018c +#define VCE_RB_WPTR 0x20190 +#define VCE_CLOCK_GATING_A 0x202f8 +#define VCE_CLOCK_GATING_B 0x202fc +#define VCE_UENC_CLOCK_GATING 0x207bc +#define VCE_UENC_REG_CLOCK_GATING 0x207c0 +#define VCE_SYS_INT_EN 0x21300 +# define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +#define VCE_LMI_CTRL2 0x21474 +#define VCE_LMI_CTRL 0x21498 +#define VCE_LMI_VM_CTRL 0x214a0 +#define VCE_LMI_SWAP_CNTL 0x214b4 +#define VCE_LMI_SWAP_CNTL1 0x214b8 +#define VCE_LMI_CACHE_CTRL 0x214f4
+#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006
#endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 4a8ac1c..a9295b9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -113,19 +113,16 @@ extern int radeon_hard_reset; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8
-/* max number of rings */ -#define RADEON_NUM_RINGS 6
/* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL
/* internal ring indices */ /* r1xx+ has gfx CP ring */ -#define RADEON_RING_TYPE_GFX_INDEX 0 +#define RADEON_RING_TYPE_GFX_INDEX 0
/* cayman has 2 compute CP rings */ -#define CAYMAN_RING_TYPE_CP1_INDEX 1 -#define CAYMAN_RING_TYPE_CP2_INDEX 2 +#define CAYMAN_RING_TYPE_CP1_INDEX 1 +#define CAYMAN_RING_TYPE_CP2_INDEX 2
/* R600+ has an async dma ring */ #define R600_RING_TYPE_DMA_INDEX 3 @@ -133,7 +130,14 @@ extern int radeon_hard_reset; #define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* R600+ */ -#define R600_RING_TYPE_UVD_INDEX 5 +#define R600_RING_TYPE_UVD_INDEX 5
+/* TN+ */ +#define TN_RING_TYPE_VCE1_INDEX 6 +#define TN_RING_TYPE_VCE2_INDEX 7
+/* max number of rings */ +#define RADEON_NUM_RINGS 8
/* hardcode those limit for now */ #define RADEON_VA_IB_OFFSET (1 << 20) @@ -1589,6 +1593,42 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, unsigned cg_upll_func_cntl);
+/*
- VCE
- */
+#define RADEON_MAX_VCE_HANDLES 16 +#define RADEON_VCE_STACK_SIZE (1024*1024) +#define RADEON_VCE_HEAP_SIZE (4*1024*1024)
+struct radeon_vce {
struct radeon_bo *vcpu_bo;
void *cpu_addr;
uint64_t gpu_addr;
atomic_t handles[RADEON_MAX_VCE_HANDLES];
struct drm_file *filp[RADEON_MAX_VCE_HANDLES];
+};
+int radeon_vce_init(struct radeon_device *rdev); +void radeon_vce_fini(struct radeon_device *rdev); +int radeon_vce_suspend(struct radeon_device *rdev); +int radeon_vce_resume(struct radeon_device *rdev); +int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence);
+int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
uint32_t handle, struct radeon_fence **fence);
+void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); +int radeon_vce_cs_parse(struct radeon_cs_parser *p); +bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring,
struct radeon_semaphore *semaphore,
bool emit_wait);
+void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); +void radeon_vce_fence_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
+int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
struct r600_audio_pin { int channels; int rate; @@ -2184,6 +2224,7 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; struct radeon_uvd uvd;
struct radeon_vce vce; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; struct radeon_wb wb; struct radeon_dummy_page dummy_page;
@@ -2203,6 +2244,7 @@ struct radeon_device { const struct firmware *sdma_fw; /* CIK SDMA firmware */ const struct firmware *smc_fw; /* SMC firmware */ const struct firmware *uvd_fw; /* UVD firmware */
const struct firmware *vce_fw; /* VCE firmware */ struct r600_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index f74db43..e9eaba99 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1987,6 +1987,19 @@ static struct radeon_asic_ring ci_dma_ring = { .set_wptr = &cik_sdma_set_wptr, };
+static struct radeon_asic_ring ci_vce_ring = {
.ib_execute = &radeon_vce_ib_execute,
.emit_fence = &radeon_vce_fence_emit,
.emit_semaphore = &radeon_vce_semaphore_emit,
.cs_parse = &radeon_vce_cs_parse,
.ring_test = &radeon_vce_ring_test,
.ib_test = &radeon_vce_ib_test,
.is_lockup = &radeon_ring_test_lockup,
.get_rptr = &vce_v1_0_get_rptr,
.get_wptr = &vce_v1_0_get_wptr,
.set_wptr = &vce_v1_0_set_wptr,
+};
static struct radeon_asic ci_asic = { .init = &cik_init, .fini = &cik_fini, @@ -2015,6 +2028,8 @@ static struct radeon_asic ci_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
[TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring,
[TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set,
@@ -2117,6 +2132,8 @@ static struct radeon_asic kv_asic = { [R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
[TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring,
[TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring, }, .irq = { .set = &cik_irq_set,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index b3bc433..2017efd 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -861,4 +861,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev, /* uvd v4.2 */ int uvd_v4_2_resume(struct radeon_device *rdev);
+/* vce v1.0 */ +uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev,
struct radeon_ring *ring);
+uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
struct radeon_ring *ring);
+void vce_v1_0_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring);
+int vce_v1_0_init(struct radeon_device *rdev); +int vce_v1_0_start(struct radeon_device *rdev);
+/* vce v2.0 */ +int vce_v2_0_resume(struct radeon_device *rdev);
#endif diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index dfb5a1d..701ee79 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -147,6 +147,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority case RADEON_CS_RING_UVD: p->ring = R600_RING_TYPE_UVD_INDEX; break;
case RADEON_CS_RING_VCE:
/* TODO: only use the low priority ring for now */
p->ring = TN_RING_TYPE_VCE1_INDEX;
break; } return 0;
} diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 114d167..0e078af 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -610,6 +610,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev, if (rdev->cmask_filp == file_priv) rdev->cmask_filp = NULL; radeon_uvd_free_handles(rdev, file_priv);
radeon_vce_free_handles(rdev, file_priv);
}
/* diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 1b783f0..149cd63 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -814,6 +814,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX; static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX; static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX; static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX; +static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX; +static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX;
static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index}, @@ -822,6 +824,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = { {"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index}, {"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index}, {"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
{"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index},
{"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index},
};
static int radeon_debugfs_sa_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 12e8099..3a13e0d 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_fence **fence) {
uint32_t handle = ring->idx ^ 0xdeafbeef; int r; if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL); if (r) { DRM_ERROR("Failed to get dummy create msg\n"); return r; }
r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence); if (r) { DRM_ERROR("Failed to get dummy destroy msg\n"); return r; }
} else if (ring->idx == TN_RING_TYPE_VCE1_INDEX ||
ring->idx == TN_RING_TYPE_VCE2_INDEX) {
r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL);
if (r) {
DRM_ERROR("Failed to get dummy create msg\n");
return r;
}
r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence);
if (r) {
DRM_ERROR("Failed to get dummy destroy msg\n");
return r;
}
} else { r = radeon_ring_lock(rdev, ring, 64); if (r) {
@@ -486,6 +502,16 @@ out_cleanup: printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); }
+static bool radeon_test_sync_possible(struct radeon_ring *ringA,
struct radeon_ring *ringB)
+{
if (ringA->idx == TN_RING_TYPE_VCE2_INDEX &&
ringB->idx == TN_RING_TYPE_VCE1_INDEX)
return false;
return true;
+}
void radeon_test_syncing(struct radeon_device *rdev) { int i, j, k; @@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringB->ready) continue;
if (!radeon_test_sync_possible(ringA, ringB))
continue;
DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); radeon_test_ring_sync(rdev, ringA, ringB);
@@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev) if (!ringC->ready) continue;
if (!radeon_test_sync_possible(ringA, ringC))
continue;
if (!radeon_test_sync_possible(ringB, ringC))
continue;
DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); radeon_test_ring_sync2(rdev, ringA, ringB, ringC);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c new file mode 100644 index 0000000..2547d8e --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -0,0 +1,588 @@ +/*
- Copyright 2013 Advanced Micro Devices, Inc.
- All Rights Reserved.
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sub license, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- USE OR OTHER DEALINGS IN THE SOFTWARE.
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial portions
- of the Software.
- Authors: Christian König christian.koenig@amd.com
- */
+#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h>
+#include "radeon.h" +#include "radeon_asic.h" +#include "sid.h"
+/* Firmware Names */ +#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+/**
- radeon_vce_init - allocate memory, load vce firmware
- @rdev: radeon_device pointer
- First step to get VCE online, allocate memory and load the firmware
- */
+int radeon_vce_init(struct radeon_device *rdev) +{
unsigned long bo_size;
const char *fw_name;
int i, r;
switch (rdev->family) {
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_KABINI:
Missing a case for HAWAII here.
Alex
From: Christian König christian.koenig@amd.com
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_kms.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 0e078af..ea018d5 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -433,6 +433,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file case RADEON_CS_RING_UVD: *value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready; break; + case RADEON_CS_RING_VCE: + *value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready; + break; default: return -EINVAL; }
From: Christian König christian.koenig@amd.com
Also make the result available to userspace.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 ++ drivers/gpu/drm/radeon/radeon_kms.c | 6 ++++ drivers/gpu/drm/radeon/radeon_vce.c | 56 +++++++++++++++++++++++++++++++++---- include/uapi/drm/radeon_drm.h | 4 +++ 4 files changed, 62 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a9295b9..bd181ec 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1604,6 +1604,8 @@ struct radeon_vce { struct radeon_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; + unsigned fw_version; + unsigned fb_version; atomic_t handles[RADEON_MAX_VCE_HANDLES]; struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; }; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index ea018d5..baff98b 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -480,6 +480,12 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file else *value = rdev->pm.default_sclk * 10; break; + case RADEON_INFO_VCE_FW_VERSION: + *value = rdev->vce.fw_version; + break; + case RADEON_INFO_VCE_FB_VERSION: + *value = rdev->vce.fb_version; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 2547d8e..f46563b 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -48,8 +48,11 @@ MODULE_FIRMWARE(FIRMWARE_BONAIRE); */ int radeon_vce_init(struct radeon_device *rdev) { - unsigned long bo_size; - const char *fw_name; + static const char *fw_version = "[ATI LIB=VCEFW,"; + static const char *fb_version = "[ATI LIB=VCEFWSTATS,"; + unsigned long size; + const char *fw_name, *c; + uint8_t start, mid, end; int i, r;
switch (rdev->family) { @@ -70,9 +73,50 @@ int radeon_vce_init(struct radeon_device *rdev) return r; }
- bo_size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + - RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; - r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, + /* search for firmware version */ + + size = rdev->vce_fw->size - strlen(fw_version) - 9; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fw_version, strlen(fw_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fw_version); + if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3) + return -EINVAL; + + /* search for feedback version */ + + size = rdev->vce_fw->size - strlen(fb_version) - 3; + c = rdev->vce_fw->data; + for (;size > 0; --size, ++c) + if (strncmp(c, fb_version, strlen(fb_version)) == 0) + break; + + if (size == 0) + return -EINVAL; + + c += strlen(fb_version); + if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1) + return -EINVAL; + + DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n", + start, mid, end, rdev->vce.fb_version); + + rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); + + /* we can only work with this fw version for now */ + if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) + return -EINVAL; + + /* load firmware into VRAM */ + + size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); @@ -83,7 +127,7 @@ int radeon_vce_init(struct radeon_device *rdev) if (r) return r;
- memset(rdev->vce.cpu_addr, 0, bo_size); + memset(rdev->vce.cpu_addr, 0, size); memcpy(rdev->vce.cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
r = radeon_vce_suspend(rdev); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 6493ca5..1cf18b4 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -988,6 +988,10 @@ struct drm_radeon_cs { #define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 /* max engine clock - needed for OpenCL */ #define RADEON_INFO_MAX_SCLK 0x1a +/* version of VCE firmware */ +#define RADEON_INFO_VCE_FW_VERSION 0x1b +/* version of VCE feedback */ +#define RADEON_INFO_VCE_FB_VERSION 0x1c
struct drm_radeon_info {
From: Alex Deucher alexander.deucher@amd.com
Similar to uvd clock setting.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index bd181ec..05fa29e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1820,6 +1820,7 @@ struct radeon_asic { void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes); void (*set_clock_gating)(struct radeon_device *rdev, int enable); int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk); + int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk); int (*get_temperature)(struct radeon_device *rdev); } pm; /* dynamic power management */ @@ -2681,6 +2682,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l)) #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e)) #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d)) +#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec)) #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev)) #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s))) #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
From: Alex Deucher alexander.deucher@amd.com
It's needed by by both the asic specific functions and the extended table parser.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/btc_dpm.c | 4 ++++ drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++--- drivers/gpu/drm/radeon/cypress_dpm.c | 4 ++++ drivers/gpu/drm/radeon/kv_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/ni_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/r600_dpm.c | 20 ++++++++++++++++++++ drivers/gpu/drm/radeon/r600_dpm.h | 2 ++ drivers/gpu/drm/radeon/rs780_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/rv6xx_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/rv770_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/si_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/sumo_dpm.c | 7 ++++--- drivers/gpu/drm/radeon/trinity_dpm.c | 7 ++++--- 13 files changed, 68 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index 0fbd36f..addf85e 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -2600,6 +2600,10 @@ int btc_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8d49104..4a0c401 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -4959,9 +4959,6 @@ static int ci_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -5077,6 +5074,12 @@ int ci_dpm_init(struct radeon_device *rdev) ci_dpm_fini(rdev); return ret; } + + ret = r600_get_platform_caps(rdev); + if (ret) { + ci_dpm_fini(rdev); + return ret; + } ret = ci_parse_power_table(rdev); if (ret) { ci_dpm_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index cf783fc..5a9a5f4 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -2036,6 +2036,10 @@ int cypress_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index b6e01d5..3d23c18 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2538,9 +2538,6 @@ static int kv_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -2590,6 +2587,10 @@ int kv_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = r600_parse_extended_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index c351226..ac4e979 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -4026,9 +4026,6 @@ static int ni_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -4090,6 +4087,10 @@ int ni_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = ni_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e4cc9b3..e8b6e4a 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -834,6 +834,26 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen return 0; }
+int r600_get_platform_caps(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + union power_info *power_info; + int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo); + u16 data_offset; + u8 frev, crev; + + if (!atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) + return -EINVAL; + power_info = (union power_info *)(mode_info->atom_context->bios + data_offset); + + rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); + rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); + rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); + + return 0; +} + /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */ #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14 diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h index 07eab2b..46b9d2a 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.h +++ b/drivers/gpu/drm/radeon/r600_dpm.h @@ -215,6 +215,8 @@ void r600_stop_dpm(struct radeon_device *rdev);
bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor);
+int r600_get_platform_caps(struct radeon_device *rdev); + int r600_parse_extended_power_table(struct radeon_device *rdev); void r600_free_extended_power_table(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c index 8512085..02f7710 100644 --- a/drivers/gpu/drm/radeon/rs780_dpm.c +++ b/drivers/gpu/drm/radeon/rs780_dpm.c @@ -807,9 +807,6 @@ static int rs780_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -859,6 +856,10 @@ int rs780_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rs780_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index bebf31c..e7045b0 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1891,9 +1891,6 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -1943,6 +1940,10 @@ int rv6xx_dpm_init(struct radeon_device *rdev) return -ENOMEM; rdev->pm.dpm.priv = pi;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv6xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c index 80c595a..b76dab7 100644 --- a/drivers/gpu/drm/radeon/rv770_dpm.c +++ b/drivers/gpu/drm/radeon/rv770_dpm.c @@ -2282,9 +2282,6 @@ int rv7xx_parse_power_table(struct radeon_device *rdev) power_info->pplib.ucNumStates, GFP_KERNEL); if (!rdev->pm.dpm.ps) return -ENOMEM; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
for (i = 0; i < power_info->pplib.ucNumStates; i++) { power_state = (union pplib_power_state *) @@ -2362,6 +2359,10 @@ int rv770_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = rv7xx_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 0471501..cefc79a 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -6271,9 +6271,6 @@ static int si_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -6350,6 +6347,10 @@ int si_dpm_init(struct radeon_device *rdev) pi->min_vddc_in_table = 0; pi->max_vddc_in_table = 0;
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = si_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c index f121efe..af4dd6a 100644 --- a/drivers/gpu/drm/radeon/sumo_dpm.c +++ b/drivers/gpu/drm/radeon/sumo_dpm.c @@ -1484,9 +1484,6 @@ static int sumo_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1772,6 +1769,10 @@ int sumo_dpm_init(struct radeon_device *rdev)
sumo_construct_boot_and_acpi_state(rdev);
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = sumo_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 2d44719..ba5a47e 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1694,9 +1694,6 @@ static int trinity_parse_power_table(struct radeon_device *rdev) if (!rdev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps); - rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime); - rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime); for (i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; @@ -1895,6 +1892,10 @@ int trinity_dpm_init(struct radeon_device *rdev)
trinity_construct_boot_state(rdev);
+ ret = r600_get_platform_caps(rdev); + if (ret) + return ret; + ret = trinity_parse_power_table(rdev); if (ret) return ret;
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/radeon.h | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++++++ 2 files changed, 20 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 05fa29e..7edd13a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1257,6 +1257,15 @@ enum radeon_dpm_event_src { RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 };
+enum radeon_vce_level { + RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ + RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ + RADEON_VCE_LEVEL_DC_LL_LOW = 2, /* DC, low latency queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */ + RADEON_VCE_LEVEL_DC_GP_LOW = 4, /* DC, general purpose queue, res <= 720 */ + RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */ +}; + struct radeon_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -1267,6 +1276,8 @@ struct radeon_ps { /* VCE clocks */ u32 evclk; u32 ecclk; + bool vce_active; + enum radeon_vce_level vce_level; /* asic priv */ void *ps_priv; }; @@ -1478,6 +1489,7 @@ struct radeon_dpm { /* special states active */ bool thermal_active; bool uvd_active; + bool vce_active; /* thermal handling */ struct radeon_dpm_thermal thermal; /* forced levels */ diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 8e8153e..a4687e7 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -826,6 +826,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
/* no need to reprogram if nothing changed unless we are on BTC+ */ if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) { + /* vce just modifies an existing state so force a change */ + if (ps->vce_active != rdev->pm.dpm.vce_active) + goto force; if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { /* for pre-BTC and APUs if the num crtcs changed but state is the same, * all we need to do is update the display configuration. @@ -862,16 +865,21 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) } }
+force: if (radeon_dpm == 1) { printk("switching from power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps); printk("switching to power state:\n"); radeon_dpm_print_power_state(rdev, rdev->pm.dpm.requested_ps); } + mutex_lock(&rdev->ddev->struct_mutex); down_write(&rdev->pm.mclk_lock); mutex_lock(&rdev->ring_lock);
+ /* update whether vce is active */ + ps->vce_active = rdev->pm.dpm.vce_active; + ret = radeon_dpm_pre_set_power_state(rdev); if (ret) goto done;
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/r600_dpm.c | 28 +++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon.h | 16 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c index e8b6e4a..cbf7e32 100644 --- a/drivers/gpu/drm/radeon/r600_dpm.c +++ b/drivers/gpu/drm/radeon/r600_dpm.c @@ -1063,7 +1063,15 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) (mode_info->atom_context->bios + data_offset + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + 1 + array->ucNumEntries * sizeof(VCEClockInfo)); + ATOM_PPLIB_VCE_State_Table *states = + (ATOM_PPLIB_VCE_State_Table *) + (mode_info->atom_context->bios + data_offset + + le16_to_cpu(ext_hdr->usVCETableOffset) + 1 + + 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) + + 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record))); ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry; + ATOM_PPLIB_VCE_State_Record *state_entry; + VCEClockInfo *vce_clk; u32 size = limits->numEntries * sizeof(struct radeon_vce_clock_voltage_dependency_entry); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries = @@ -1075,8 +1083,9 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count = limits->numEntries; entry = &limits->entries[0]; + state_entry = &states->entries[0]; for (i = 0; i < limits->numEntries; i++) { - VCEClockInfo *vce_clk = (VCEClockInfo *) + vce_clk = (VCEClockInfo *) ((u8 *)&array->entries[0] + (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk = @@ -1088,6 +1097,23 @@ int r600_parse_extended_power_table(struct radeon_device *rdev) entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *) ((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)); } + for (i = 0; i < states->numEntries; i++) { + if (i >= RADEON_MAX_VCE_LEVELS) + break; + vce_clk = (VCEClockInfo *) + ((u8 *)&array->entries[0] + + (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo))); + rdev->pm.dpm.vce_states[i].evclk = + le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16); + rdev->pm.dpm.vce_states[i].ecclk = + le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16); + rdev->pm.dpm.vce_states[i].clk_idx = + state_entry->ucClockInfoIndex & 0x3f; + rdev->pm.dpm.vce_states[i].pstate = + (state_entry->ucClockInfoIndex & 0xc0) >> 6; + state_entry = (ATOM_PPLIB_VCE_State_Record *) + ((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record)); + } } if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) && ext_hdr->usUVDTableOffset) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7edd13a..7e34ecd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1257,6 +1257,8 @@ enum radeon_dpm_event_src { RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4 };
+#define RADEON_MAX_VCE_LEVELS 6 + enum radeon_vce_level { RADEON_VCE_LEVEL_AC_ALL = 0, /* AC, All cases */ RADEON_VCE_LEVEL_DC_EE = 1, /* DC, entropy encoding */ @@ -1452,6 +1454,17 @@ enum radeon_dpm_forced_level { RADEON_DPM_FORCED_LEVEL_HIGH = 2, };
+struct radeon_vce_state { + /* vce clocks */ + u32 evclk; + u32 ecclk; + /* gpu clocks */ + u32 sclk; + u32 mclk; + u8 clk_idx; + u8 pstate; +}; + struct radeon_dpm { struct radeon_ps *ps; /* number of valid power states */ @@ -1464,6 +1477,9 @@ struct radeon_dpm { struct radeon_ps *boot_ps; /* default uvd power state */ struct radeon_ps *uvd_ps; + /* vce requirements */ + struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS]; + enum radeon_vce_level vce_level; enum radeon_pm_state_type state; enum radeon_pm_state_type user_state; u32 platform_caps;
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/cik.c | 35 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/cikd.h | 6 ++++++ drivers/gpu/drm/radeon/radeon_asic.c | 2 ++ drivers/gpu/drm/radeon/radeon_asic.h | 1 + 4 files changed, 44 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index be6eb4d..ecb16b1 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8925,6 +8925,41 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) return r; }
+int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + int r, i; + struct atom_clock_dividers dividers; + u32 tmp; + + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK, + ecclk, false, ÷rs); + if (r) + return r; + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + tmp = RREG32_SMC(CG_ECLK_CNTL); + tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK); + tmp |= dividers.post_divider; + WREG32_SMC(CG_ECLK_CNTL, tmp); + + for (i = 0; i < 100; i++) { + if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + return 0; +} + static void cik_pcie_gen3_enable(struct radeon_device *rdev) { struct pci_dev *root = rdev->pdev->bus->self; diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 459ae02..ee16380 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -203,6 +203,12 @@ #define CTF_TEMP_MASK 0x0003fe00 #define CTF_TEMP_SHIFT 9
+#define CG_ECLK_CNTL 0xC05000AC +# define ECLK_DIVIDER_MASK 0x7f +# define ECLK_DIR_CNTL_EN (1 << 8) +#define CG_ECLK_STATUS 0xC05000B0 +# define ECLK_STATUS (1 << 0) + #define CG_SPLL_FUNC_CNTL 0xC0500140 #define SPLL_RESET (1 << 0) #define SPLL_PWRON (1 << 1) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index e9eaba99..db1df9a 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2076,6 +2076,7 @@ static struct radeon_asic ci_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &ci_get_temp, }, .dpm = { @@ -2180,6 +2181,7 @@ static struct radeon_asic kv_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &cik_set_uvd_clocks, + .set_vce_clocks = &cik_set_vce_clocks, .get_temperature = &kv_get_temp, }, .dpm = { diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 2017efd..385bb6c 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -715,6 +715,7 @@ u32 cik_get_xclk(struct radeon_device *rdev); uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg); void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); +int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); void cik_sdma_fence_ring_emit(struct radeon_device *rdev, struct radeon_fence *fence); bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/ci_dpm.c | 50 ++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 4a0c401..c91d0ee 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -746,6 +746,14 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; int i;
+ if (rps->vce_active) { + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + rps->evclk = 0; + rps->ecclk = 0; + } + if ((rdev->pm.dpm.new_active_crtc_count > 1) || ci_dpm_vblank_too_short(rdev)) disable_mclk_switching = true; @@ -804,6 +812,13 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, sclk = ps->performance_levels[0].sclk; }
+ if (rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; + } + ps->performance_levels[0].sclk = sclk; ps->performance_levels[0].mclk = mclk;
@@ -3468,7 +3483,6 @@ static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; }
-#if 0 static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3501,6 +3515,7 @@ static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable) 0 : -EINVAL; }
+#if 0 static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable) { struct ci_power_info *pi = ci_get_pi(rdev); @@ -3587,7 +3602,6 @@ static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate) return ci_enable_uvd_dpm(rdev, !gate); }
-#if 0 static u8 ci_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -3608,13 +3622,11 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, struct radeon_ps *radeon_current_state) { struct ci_power_info *pi = ci_get_pi(rdev); - bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0); - bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0); int ret = 0; u32 tmp;
- if (new_vce_clock_non_zero != old_vce_clock_non_zero) { - if (new_vce_clock_non_zero) { + if (radeon_current_state->evclk != radeon_new_state->evclk) { + if (radeon_new_state->evclk) { pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev);
tmp = RREG32_SMC(DPM_TABLE_475); @@ -3630,6 +3642,7 @@ static int ci_update_vce_dpm(struct radeon_device *rdev, return ret; }
+#if 0 static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate) { return ci_enable_samu_dpm(rdev, gate); @@ -4752,13 +4765,13 @@ int ci_dpm_set_power_state(struct radeon_device *rdev) DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n"); return ret; } -#if 0 + ret = ci_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("ci_update_vce_dpm failed\n"); return ret; } -#endif + ret = ci_update_sclk_t(rdev); if (ret) { DRM_ERROR("ci_update_sclk_t failed\n"); @@ -4995,6 +5008,21 @@ static int ci_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk, mclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->ci.usEngineClockLow); + sclk |= clock_info->ci.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow); + mclk |= clock_info->ci.ucMemoryClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = mclk; + } + return 0; }
@@ -5080,12 +5108,14 @@ int ci_dpm_init(struct radeon_device *rdev) ci_dpm_fini(rdev); return ret; } - ret = ci_parse_power_table(rdev); + + ret = r600_parse_extended_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret; } - ret = r600_parse_extended_power_table(rdev); + + ret = ci_parse_power_table(rdev); if (ret) { ci_dpm_fini(rdev); return ret;
From: Alex Deucher alexander.deucher@amd.com
VCE dpm dynamically adjusts the uvd clocks on demand.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/ci_dpm.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index c91d0ee..6669d32 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5153,6 +5153,7 @@ int ci_dpm_init(struct radeon_device *rdev) pi->caps_sclk_throttle_low_notification = false;
pi->caps_uvd_dpm = true; + pi->caps_vce_dpm = true;
ci_get_leakage_voltages(rdev); ci_patch_dependency_tables_with_leakage(rdev);
From: Alex Deucher alexander.deucher@amd.com
TODO: plug in cik_vce_suspend()/resume() so we can enable vce powergating. See XXX in code.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/kv_dpm.c | 46 +++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 3d23c18..a56398d 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1338,13 +1338,11 @@ static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable) PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable); }
-#if 0 static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable) { return kv_notify_message_to_smu(rdev, enable ? PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable); } -#endif
static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable) { @@ -1389,7 +1387,6 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate) return kv_enable_uvd_dpm(rdev, !gate); }
-#if 0 static u8 kv_get_vce_boot_level(struct radeon_device *rdev) { u8 i; @@ -1414,6 +1411,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, int ret;
if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { + kv_dpm_powergate_vce(rdev, false); + /* XXX cik_vce_resume(); */ if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1436,11 +1435,12 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); + /* XXX cik_vce_suspend(); */ + kv_dpm_powergate_vce(rdev, true); }
return 0; } -#endif
static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate) { @@ -1768,7 +1768,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) { struct kv_power_info *pi = kv_get_pi(rdev); struct radeon_ps *new_ps = &pi->requested_rps; - /*struct radeon_ps *old_ps = &pi->current_rps;*/ + struct radeon_ps *old_ps = &pi->current_rps; int ret;
if (pi->bapm_enable) { @@ -1798,13 +1798,12 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_set_enabled_levels(rdev); kv_force_lowest_valid(rdev); kv_unforce_levels(rdev); -#if 0 + ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_sclk_t(rdev); } } else { @@ -1823,13 +1822,11 @@ int kv_dpm_set_power_state(struct radeon_device *rdev) kv_program_nbps_index_settings(rdev, new_ps); kv_freeze_sclk_dpm(rdev, false); kv_set_enabled_levels(rdev); -#if 0 ret = kv_update_vce_dpm(rdev, new_ps, old_ps); if (ret) { DRM_ERROR("kv_update_vce_dpm failed\n"); return ret; } -#endif kv_update_acp_boot_level(rdev); kv_update_sclk_t(rdev); kv_enable_nb_dpm(rdev); @@ -2037,6 +2034,14 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_clock_and_voltage_limits *max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+ if (new_rps->vce_active) { + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + new_rps->evclk = 0; + new_rps->ecclk = 0; + } + mclk = max_limits->mclk; sclk = min_sclk;
@@ -2056,6 +2061,11 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, sclk = stable_p_state_sclk; }
+ if (new_rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + } + ps->need_dfs_bypass = true;
for (i = 0; i < ps->num_levels; i++) { @@ -2092,7 +2102,8 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev, } }
- pi->video_start = new_rps->dclk || new_rps->vclk; + pi->video_start = new_rps->dclk || new_rps->vclk || + new_rps->evclk || new_rps->ecclk;
if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY) @@ -2574,6 +2585,19 @@ static int kv_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = 0; + } + return 0; }
@@ -2624,7 +2648,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_fps = false; /* true? */ pi->caps_uvd_pg = true; pi->caps_uvd_dpm = true; - pi->caps_vce_pg = false; + pi->caps_vce_pg = false; /* XXX true */ pi->caps_samu_pg = false; pi->caps_acp_pg = false; pi->caps_stable_p_state = false;
From: Alex Deucher alexander.deucher@amd.com
enable vce states when vce is active. When vce is active, it adjusts the currently selected state (performance, battery, uvd, etc.)
v2: add code comments
Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon.h | 3 ++ drivers/gpu/drm/radeon/radeon_cs.c | 3 ++ drivers/gpu/drm/radeon/radeon_pm.c | 17 ++++++++++ drivers/gpu/drm/radeon/radeon_vce.c | 62 +++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7e34ecd..63777e4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1516,6 +1516,7 @@ struct radeon_dpm { };
void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable); +void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable);
struct radeon_pm { struct mutex mutex; @@ -1636,6 +1637,7 @@ struct radeon_vce { unsigned fb_version; atomic_t handles[RADEON_MAX_VCE_HANDLES]; struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; + struct delayed_work idle_work; };
int radeon_vce_init(struct radeon_device *rdev); @@ -1647,6 +1649,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp); +void radeon_vce_note_usage(struct radeon_device *rdev); int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi); int radeon_vce_cs_parse(struct radeon_cs_parser *p); bool radeon_vce_semaphore_emit(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 701ee79..f28a8d8 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -347,6 +347,9 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
if (parser->ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_note_usage(rdev); + else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || + (parser->ring == TN_RING_TYPE_VCE2_INDEX)) + radeon_vce_note_usage(rdev);
radeon_cs_sync_rings(parser); r = radeon_ib_schedule(rdev, &parser->ib, NULL); diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index a4687e7..4ad9af9 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -968,6 +968,23 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable) } }
+void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable) +{ + if (enable) { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = true; + /* XXX select vce level based on ring/task */ + rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL; + mutex_unlock(&rdev->pm.mutex); + } else { + mutex_lock(&rdev->pm.mutex); + rdev->pm.dpm.vce_active = false; + mutex_unlock(&rdev->pm.mutex); + } + + radeon_pm_compute_clocks(rdev); +} + static void radeon_pm_suspend_old(struct radeon_device *rdev) { mutex_lock(&rdev->pm.mutex); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index f46563b..d130432 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -34,11 +34,16 @@ #include "radeon_asic.h" #include "sid.h"
+/* 1 second timeout */ +#define VCE_IDLE_TIMEOUT_MS 1000 + /* Firmware Names */ #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+static void radeon_vce_idle_work_handler(struct work_struct *work); + /** * radeon_vce_init - allocate memory, load vce firmware * @@ -55,6 +60,8 @@ int radeon_vce_init(struct radeon_device *rdev) uint8_t start, mid, end; int i, r;
+ INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); + switch (rdev->family) { case CHIP_BONAIRE: case CHIP_KAVERI: @@ -220,6 +227,59 @@ int radeon_vce_resume(struct radeon_device *rdev) }
/** + * radeon_vce_idle_work_handler - power off VCE + * + * @work: pointer to work structure + * + * power of VCE when it's not used any more + */ +static void radeon_vce_idle_work_handler(struct work_struct *work) +{ + struct radeon_device *rdev = + container_of(work, struct radeon_device, vce.idle_work.work); + + if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) && + (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, false); + } else { + radeon_set_vce_clocks(rdev, 0, 0); + } + } else { + schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + } +} + +/** + * radeon_vce_note_usage - power up VCE + * + * @rdev: radeon_device pointer + * + * Make sure VCE is powerd up when we want to use it + */ +void radeon_vce_note_usage(struct radeon_device *rdev) +{ + bool streams_changed = false; + bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work); + set_clocks &= schedule_delayed_work(&rdev->vce.idle_work, + msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS)); + + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* XXX figure out if the streams changed */ + streams_changed = false; + } + + if (set_clocks || streams_changed) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + radeon_dpm_enable_vce(rdev, true); + } else { + radeon_set_vce_clocks(rdev, 53300, 40000); + } + } +} + +/** * radeon_vce_free_handles - free still open VCE handles * * @rdev: radeon_device pointer @@ -235,6 +295,8 @@ void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp) if (!handle || rdev->vce.filp[i] != filp) continue;
+ radeon_vce_note_usage(rdev); + r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX, handle, NULL); if (r)
From: Alex Deucher alexander.deucher@amd.com
The adds the appropriate function calls to properly re-init vce before it's used after it has been power gated.
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/kv_dpm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index a56398d..035acc4 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1412,7 +1412,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev,
if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { kv_dpm_powergate_vce(rdev, false); - /* XXX cik_vce_resume(); */ if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1435,7 +1434,6 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); - /* XXX cik_vce_suspend(); */ kv_dpm_powergate_vce(rdev, true); }
@@ -1575,11 +1573,16 @@ static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate) pi->vce_power_gated = gate;
if (gate) { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { + /* XXX do we need a vce_v1_0_stop() ? */ kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF); + } } else { - if (pi->caps_vce_pg) + if (pi->caps_vce_pg) { kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON); + vce_v2_0_resume(rdev); + vce_v1_0_start(rdev); + } } }
From: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/cikd.h | 10 ++++ drivers/gpu/drm/radeon/vce_v2_0.c | 111 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+)
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ee16380..2138732 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2029,8 +2029,18 @@ #define VCE_RB_RPTR 0x2018c #define VCE_RB_WPTR 0x20190 #define VCE_CLOCK_GATING_A 0x202f8 +# define CGC_CLK_GATE_DLY_TIMER_MASK (0xf << 0) +# define CGC_CLK_GATE_DLY_TIMER(x) ((x) << 0) +# define CGC_CLK_GATER_OFF_DLY_TIMER_MASK (0xff << 4) +# define CGC_CLK_GATER_OFF_DLY_TIMER(x) ((x) << 4) +# define CGC_UENC_WAIT_AWAKE (1 << 18) #define VCE_CLOCK_GATING_B 0x202fc +#define VCE_CGTT_CLK_OVERRIDE 0x207a0 #define VCE_UENC_CLOCK_GATING 0x207bc +# define CLOCK_ON_DELAY_MASK (0xf << 0) +# define CLOCK_ON_DELAY(x) ((x) << 0) +# define CLOCK_OFF_DELAY_MASK (0xff << 4) +# define CLOCK_OFF_DELAY(x) ((x) << 4) #define VCE_UENC_REG_CLOCK_GATING 0x207c0 #define VCE_SYS_INT_EN 0x21300 # define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index 4911d1b..1ac7bb8 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -31,6 +31,115 @@ #include "radeon_asic.h" #include "cikd.h"
+static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) +{ + u32 tmp; + + if (gated) { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); + } else { + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0xe7; + tmp &= ~0xe70000; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0x1fe000; + tmp &= ~0xff000000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3fc; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static void vce_v2_0_set_dyn_cg(struct radeon_device *rdev, bool gated) +{ + u32 orig, tmp; + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp &= ~0x00060006; + if (gated) { + tmp |= 0xe10000; + } else { + tmp |= 0xe1; + tmp &= ~0xe10000; + } + WREG32(VCE_CLOCK_GATING_B, tmp); + + orig = tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0x1fe000; + tmp &= ~0xff000000; + if (tmp != orig) + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + orig = tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3fc; + if (tmp != orig) + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + + if (gated) + WREG32(VCE_CGTT_CLK_OVERRIDE, 0); +} + +static void vce_v2_0_disable_cg(struct radeon_device *rdev) +{ + WREG32(VCE_CGTT_CLK_OVERRIDE, 7); +} + +void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + bool sw_cg = false; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, true); + else + vce_v2_0_set_dyn_cg(rdev, true); + } else { + vce_v2_0_disable_cg(rdev); + + if (sw_cg) + vce_v2_0_set_sw_cg(rdev, false); + else + vce_v2_0_set_dyn_cg(rdev, false); + } +} + +static void vce_v2_0_init_cg(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp &= ~(CGC_CLK_GATE_DLY_TIMER_MASK | CGC_CLK_GATER_OFF_DLY_TIMER_MASK); + tmp |= (CGC_CLK_GATE_DLY_TIMER(0) | CGC_CLK_GATER_OFF_DLY_TIMER(4)); + tmp |= CGC_UENC_WAIT_AWAKE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~(CLOCK_ON_DELAY_MASK | CLOCK_OFF_DELAY_MASK); + tmp |= (CLOCK_ON_DELAY(0) | CLOCK_OFF_DELAY(4)); + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0x10; + tmp &= ~0x100000; + WREG32(VCE_CLOCK_GATING_B, tmp); +} + int vce_v2_0_resume(struct radeon_device *rdev) { uint64_t addr = rdev->vce.gpu_addr; @@ -66,5 +175,7 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32_P(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, ~VCE_SYS_INT_TRAP_INTERRUPT_EN);
+ vce_v2_0_init_cg(rdev); + return 0; }
From: Alex Deucher alexander.deucher@amd.com
Some of the vce clocks are automatic, others need to be manually enabled. For ease, just disable cg when vce is active.
v2: rebased
Signed-off-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/radeon/ci_dpm.c | 9 ++++++++- drivers/gpu/drm/radeon/cik.c | 5 +++++ drivers/gpu/drm/radeon/kv_dpm.c | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 6669d32..cad89a9 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -172,6 +172,8 @@ extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev, extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev); extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev); extern int ci_mc_load_microcode(struct radeon_device *rdev); +extern void cik_update_cg(struct radeon_device *rdev, + u32 block, bool enable);
static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev, struct atom_voltage_table_entry *voltage_table, @@ -3627,8 +3629,10 @@ static int ci_update_vce_dpm(struct radeon_device *rdev,
if (radeon_current_state->evclk != radeon_new_state->evclk) { if (radeon_new_state->evclk) { - pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false);
+ pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev); tmp = RREG32_SMC(DPM_TABLE_475); tmp &= ~VceBootLevel_MASK; tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel); @@ -3636,6 +3640,9 @@ static int ci_update_vce_dpm(struct radeon_device *rdev,
ret = ci_enable_vce_dpm(rdev, true); } else { + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); + ret = ci_enable_vce_dpm(rdev, false); } } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ecb16b1..2b31c32 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -75,6 +75,7 @@ extern void si_init_uvd_internal_cg(struct radeon_device *rdev); extern int cik_sdma_resume(struct radeon_device *rdev); extern void cik_sdma_enable(struct radeon_device *rdev, bool enable); extern void cik_sdma_fini(struct radeon_device *rdev); +extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable); static void cik_rlc_stop(struct radeon_device *rdev); static void cik_pcie_gen3_enable(struct radeon_device *rdev); static void cik_program_aspm(struct radeon_device *rdev); @@ -6141,6 +6142,10 @@ void cik_update_cg(struct radeon_device *rdev, cik_enable_hdp_mgcg(rdev, enable); cik_enable_hdp_ls(rdev, enable); } + + if (block & RADEON_CG_BLOCK_VCE) { + vce_v2_0_enable_mgcg(rdev, enable); + } }
static void cik_init_cg(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 035acc4..19c5e6a 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -1412,6 +1412,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev,
if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) { kv_dpm_powergate_vce(rdev, false); + /* turn the clocks on when encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false); if (pi->caps_stable_p_state) pi->vce_boot_level = table->count - 1; else @@ -1434,6 +1436,8 @@ static int kv_update_vce_dpm(struct radeon_device *rdev, kv_enable_vce_dpm(rdev, true); } else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) { kv_enable_vce_dpm(rdev, false); + /* turn the clocks off when not encoding */ + cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true); kv_dpm_powergate_vce(rdev, true); }
dri-devel@lists.freedesktop.org