On Wed, Jun 26, 2013 at 09:22:12AM -0400, alexdeucher@gmail.com wrote:
From: Alex Deucher alexander.deucher@amd.com
On CIK, the compute rings work slightly differently than on previous asics, however the basic concepts are the same.
The main differences:
- New MEC engines for compute queues
- Multiple queues per MEC:
- CI/KB: 1 MEC, 4 pipes per MEC, 8 queues per pipe = 32 queues
- KV: 2 MEC, 4 pipes per MEC, 8 queues per pipe = 64 queues
- Queues can be allocated and scheduled by another queue
- New doorbell aperture allows you to assign space in the aperture for the wptr which allows for userspace access to queues
v2: add wptr shadow, fix eop setup
Signed-off-by: Alex Deucher alexander.deucher@amd.com
Minor nitpick below otherwise
Reviewed-by: Jerome Glisse jglisse@redhat.com
drivers/gpu/drm/radeon/cik.c | 528 +++++++++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/cikd.h | 62 +++++ drivers/gpu/drm/radeon/radeon.h | 19 ++ drivers/gpu/drm/radeon/radeon_cs.c | 4 +- 4 files changed, 601 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 5c28fa5..9d2d6bb 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -1687,6 +1687,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); radeon_ring_unlock_commit(rdev, ring);
- for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF)
@@ -2112,6 +2113,51 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev) return 0; }
+static u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
struct radeon_ring *ring)
+{
- u32 rptr;
- if (rdev->wb.enabled) {
rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
- } else {
cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
rptr = RREG32(CP_HQD_PQ_RPTR);
cik_srbm_select(rdev, 0, 0, 0, 0);
- }
- rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
- return rptr;
+}
+static u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
struct radeon_ring *ring)
+{
- u32 wptr;
- if (rdev->wb.enabled) {
wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
- } else {
cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
wptr = RREG32(CP_HQD_PQ_WPTR);
cik_srbm_select(rdev, 0, 0, 0, 0);
- }
- wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
- return wptr;
+}
+static void cik_compute_ring_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring)
+{
- u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
- rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
- WDOORBELL32(ring->doorbell_offset, wptr);
+}
/**
- cik_cp_compute_enable - enable/disable the compute CP MEs
@@ -2176,7 +2222,8 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev) */ static int cik_cp_compute_start(struct radeon_device *rdev) {
- //todo
- cik_cp_compute_enable(rdev, true);
- return 0;
}
@@ -2190,10 +2237,171 @@ static int cik_cp_compute_start(struct radeon_device *rdev) */ static void cik_cp_compute_fini(struct radeon_device *rdev) {
- int i, idx, r;
- cik_cp_compute_enable(rdev, false);
- //todo
- for (i = 0; i < 2; i++) {
if (i == 0)
idx = CAYMAN_RING_TYPE_CP1_INDEX;
else
idx = CAYMAN_RING_TYPE_CP2_INDEX;
if (rdev->ring[idx].mqd_obj) {
r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
if (unlikely(r != 0))
dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
radeon_bo_unpin(rdev->ring[idx].mqd_obj);
radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
radeon_bo_unref(&rdev->ring[idx].mqd_obj);
rdev->ring[idx].mqd_obj = NULL;
}
- }
+}
+static void cik_mec_fini(struct radeon_device *rdev) +{
- int r;
- if (rdev->mec.hpd_eop_obj) {
r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
if (unlikely(r != 0))
dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
radeon_bo_unpin(rdev->mec.hpd_eop_obj);
radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
radeon_bo_unref(&rdev->mec.hpd_eop_obj);
rdev->mec.hpd_eop_obj = NULL;
- }
+}
+#define MEC_HPD_SIZE 2048
+static int cik_mec_init(struct radeon_device *rdev) +{
- int r;
- u32 *hpd;
- /*
* KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
* CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
*/
- if (rdev->family == CHIP_KAVERI)
rdev->mec.num_mec = 2;
- else
rdev->mec.num_mec = 1;
- rdev->mec.num_pipe = 4;
- rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
- if (rdev->mec.hpd_eop_obj == NULL) {
r = radeon_bo_create(rdev,
rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
PAGE_SIZE, true,
RADEON_GEM_DOMAIN_GTT, NULL,
&rdev->mec.hpd_eop_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
return r;
}
- }
- r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
- if (unlikely(r != 0)) {
cik_mec_fini(rdev);
return r;
- }
- r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
&rdev->mec.hpd_eop_gpu_addr);
- if (r) {
dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
cik_mec_fini(rdev);
return r;
- }
- r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
- if (r) {
dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
cik_mec_fini(rdev);
return r;
- }
- /* clear memory. Not sure if this is required or not */
- memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
- radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
- radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
- return 0;
}
+struct hqd_registers +{
- u32 cp_mqd_base_addr;
- u32 cp_mqd_base_addr_hi;
- u32 cp_hqd_active;
- u32 cp_hqd_vmid;
- u32 cp_hqd_persistent_state;
- u32 cp_hqd_pipe_priority;
- u32 cp_hqd_queue_priority;
- u32 cp_hqd_quantum;
- u32 cp_hqd_pq_base;
- u32 cp_hqd_pq_base_hi;
- u32 cp_hqd_pq_rptr;
- u32 cp_hqd_pq_rptr_report_addr;
- u32 cp_hqd_pq_rptr_report_addr_hi;
- u32 cp_hqd_pq_wptr_poll_addr;
- u32 cp_hqd_pq_wptr_poll_addr_hi;
- u32 cp_hqd_pq_doorbell_control;
- u32 cp_hqd_pq_wptr;
- u32 cp_hqd_pq_control;
- u32 cp_hqd_ib_base_addr;
- u32 cp_hqd_ib_base_addr_hi;
- u32 cp_hqd_ib_rptr;
- u32 cp_hqd_ib_control;
- u32 cp_hqd_iq_timer;
- u32 cp_hqd_iq_rptr;
- u32 cp_hqd_dequeue_request;
- u32 cp_hqd_dma_offload;
- u32 cp_hqd_sema_cmd;
- u32 cp_hqd_msg_type;
- u32 cp_hqd_atomic0_preop_lo;
- u32 cp_hqd_atomic0_preop_hi;
- u32 cp_hqd_atomic1_preop_lo;
- u32 cp_hqd_atomic1_preop_hi;
- u32 cp_hqd_hq_scheduler0;
- u32 cp_hqd_hq_scheduler1;
- u32 cp_mqd_control;
+};
+struct bonaire_mqd +{
- u32 header;
- u32 dispatch_initiator;
- u32 dimensions[3];
- u32 start_idx[3];
- u32 num_threads[3];
- u32 pipeline_stat_enable;
- u32 perf_counter_enable;
- u32 pgm[2];
- u32 tba[2];
- u32 tma[2];
- u32 pgm_rsrc[2];
- u32 vmid;
- u32 resource_limits;
- u32 static_thread_mgmt01[2];
- u32 tmp_ring_size;
- u32 static_thread_mgmt23[2];
- u32 restart[3];
- u32 thread_trace_enable;
- u32 reserved1;
- u32 user_data[16];
- u32 vgtcs_invoke_count[2];
- struct hqd_registers queue_state;
- u32 dequeue_cntr;
- u32 interrupt_queue[64];
+};
/**
- cik_cp_compute_resume - setup the compute queue registers
@@ -2205,24 +2413,247 @@ static void cik_cp_compute_fini(struct radeon_device *rdev) */ static int cik_cp_compute_resume(struct radeon_device *rdev) {
- int r;
- int r, i, idx;
- u32 tmp;
- bool use_doorbell = true;
- u64 hqd_gpu_addr;
- u64 mqd_gpu_addr;
- u64 eop_gpu_addr;
- u64 wb_gpu_addr;
- u32 *buf;
- struct bonaire_mqd *mqd;
- //todo r = cik_cp_compute_start(rdev); if (r) return r;
- /* fix up chicken bits */
- tmp = RREG32(CP_CPF_DEBUG);
- tmp |= (1 << 23);
What is this chicken bit ? Is it tasty ?
- WREG32(CP_CPF_DEBUG, tmp);
- /* init the pipes */
- for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
int me = (i < 4) ? 1 : 2;
int pipe = (i < 4) ? i : (i - 4);
eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
cik_srbm_select(rdev, me, pipe, 0, 0);
/* write the EOP addr */
WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
/* set the VMID assigned */
WREG32(CP_HPD_EOP_VMID, 0);
/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
tmp = RREG32(CP_HPD_EOP_CONTROL);
tmp &= ~EOP_SIZE_MASK;
tmp |= drm_order(MEC_HPD_SIZE / 8);
WREG32(CP_HPD_EOP_CONTROL, tmp);
- }
- cik_srbm_select(rdev, 0, 0, 0, 0);
- /* init the queues. Just two for now. */
- for (i = 0; i < 2; i++) {
if (i == 0)
idx = CAYMAN_RING_TYPE_CP1_INDEX;
else
idx = CAYMAN_RING_TYPE_CP2_INDEX;
if (rdev->ring[idx].mqd_obj == NULL) {
r = radeon_bo_create(rdev,
sizeof(struct bonaire_mqd),
PAGE_SIZE, true,
RADEON_GEM_DOMAIN_GTT, NULL,
&rdev->ring[idx].mqd_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
return r;
}
}
r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
if (unlikely(r != 0)) {
cik_cp_compute_fini(rdev);
return r;
}
r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
&mqd_gpu_addr);
if (r) {
dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
cik_cp_compute_fini(rdev);
return r;
}
r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
if (r) {
dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
cik_cp_compute_fini(rdev);
return r;
}
/* doorbell offset */
rdev->ring[idx].doorbell_offset =
(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
/* init the mqd struct */
memset(buf, 0, sizeof(struct bonaire_mqd));
mqd = (struct bonaire_mqd *)buf;
mqd->header = 0xC0310800;
mqd->static_thread_mgmt01[0] = 0xffffffff;
mqd->static_thread_mgmt01[1] = 0xffffffff;
mqd->static_thread_mgmt23[0] = 0xffffffff;
mqd->static_thread_mgmt23[1] = 0xffffffff;
cik_srbm_select(rdev, rdev->ring[idx].me,
rdev->ring[idx].pipe,
rdev->ring[idx].queue, 0);
/* disable wptr polling */
tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
tmp &= ~WPTR_POLL_EN;
WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
/* enable doorbell? */
mqd->queue_state.cp_hqd_pq_doorbell_control =
RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
if (use_doorbell)
mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
else
mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
mqd->queue_state.cp_hqd_pq_doorbell_control);
/* disable the queue if it's active */
mqd->queue_state.cp_hqd_dequeue_request = 0;
mqd->queue_state.cp_hqd_pq_rptr = 0;
mqd->queue_state.cp_hqd_pq_wptr= 0;
if (RREG32(CP_HQD_ACTIVE) & 1) {
WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
for (i = 0; i < rdev->usec_timeout; i++) {
if (!(RREG32(CP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
}
/* set the pointer to the MQD */
mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
/* set MQD vmid to 0 */
mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
/* set up the HQD, this is similar to CP_RB0_CNTL */
mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
mqd->queue_state.cp_hqd_pq_control &=
~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
mqd->queue_state.cp_hqd_pq_control |=
drm_order(rdev->ring[idx].ring_size / 8);
mqd->queue_state.cp_hqd_pq_control |=
(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
+#ifdef __BIG_ENDIAN
mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
+#endif
mqd->queue_state.cp_hqd_pq_control &=
~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
mqd->queue_state.cp_hqd_pq_control |=
PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
if (i == 0)
wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
else
wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
/* set the wb address wether it's enabled or not */
if (i == 0)
wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
else
wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
mqd->queue_state.cp_hqd_pq_rptr_report_addr);
WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
/* enable the doorbell if requested */
if (use_doorbell) {
mqd->queue_state.cp_hqd_pq_doorbell_control =
RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
mqd->queue_state.cp_hqd_pq_doorbell_control |=
DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
mqd->queue_state.cp_hqd_pq_doorbell_control &=
~(DOORBELL_SOURCE | DOORBELL_HIT);
} else {
mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
}
WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
mqd->queue_state.cp_hqd_pq_doorbell_control);
/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
rdev->ring[idx].wptr = 0;
mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
/* set the vmid for the queue */
mqd->queue_state.cp_hqd_vmid = 0;
WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
/* activate the queue */
mqd->queue_state.cp_hqd_active = 1;
WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
cik_srbm_select(rdev, 0, 0, 0, 0);
radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
rdev->ring[idx].ready = true;
r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
if (r)
rdev->ring[idx].ready = false;
- }
- return 0;
}
-/* XXX temporary wrappers to handle both compute and gfx */ -/* XXX */ static void cik_cp_enable(struct radeon_device *rdev, bool enable) { cik_cp_gfx_enable(rdev, enable); cik_cp_compute_enable(rdev, enable); }
-/* XXX */ static int cik_cp_load_microcode(struct radeon_device *rdev) { int r; @@ -2237,14 +2668,12 @@ static int cik_cp_load_microcode(struct radeon_device *rdev) return 0; }
-/* XXX */ static void cik_cp_fini(struct radeon_device *rdev) { cik_cp_gfx_fini(rdev); cik_cp_compute_fini(rdev); }
-/* XXX */ static int cik_cp_resume(struct radeon_device *rdev) { int r; @@ -2865,6 +3294,22 @@ static void cik_print_gpu_status_regs(struct radeon_device *rdev) RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n", RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
- dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
- dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
RREG32(CP_STALLED_STAT1));
- dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
RREG32(CP_STALLED_STAT2));
- dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
RREG32(CP_STALLED_STAT3));
- dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
RREG32(CP_CPF_BUSY_STAT));
- dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
RREG32(CP_CPF_STALLED_STAT1));
- dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
- dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
- dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
RREG32(CP_CPC_STALLED_STAT1));
- dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
}
/** @@ -4952,12 +5397,31 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r;
- /* allocate rlc buffers */
Init mec not allocate rlc ...
r = cik_mec_init(rdev);
if (r) {
DRM_ERROR("Failed to init MEC BOs!\n");
return r;
}
r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); return r; }
r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
return r;
}
r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
return r;
}
r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX); if (r) { dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
@@ -5002,6 +5466,36 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r;
- /* set up the compute queues */
- ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
- r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
0, 0xfffff, RADEON_CP_PACKET2);
- if (r)
return r;
- ring->me = 1; /* first MEC */
- ring->pipe = 0; /* first pipe */
- ring->queue = 0; /* first queue */
- ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
- ring->funcs.get_rptr = &cik_compute_ring_get_rptr;
- ring->funcs.get_wptr = &cik_compute_ring_get_wptr;
- ring->funcs.set_wptr = &cik_compute_ring_set_wptr;
- ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
- r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
0, 0xffffffff, RADEON_CP_PACKET2);
- if (r)
return r;
- /* dGPU only have 1 MEC */
- ring->me = 1; /* first MEC */
- ring->pipe = 0; /* first pipe */
- ring->queue = 1; /* second queue */
- ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
- ring->funcs.get_rptr = &cik_compute_ring_get_rptr;
- ring->funcs.get_wptr = &cik_compute_ring_get_wptr;
- ring->funcs.set_wptr = &cik_compute_ring_set_wptr;
- ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET, SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
@@ -5176,6 +5670,20 @@ int cik_init(struct radeon_device *rdev) ring->ring_obj = NULL; r600_ring_init(rdev, ring, 1024 * 1024);
- ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
- ring->ring_obj = NULL;
- r600_ring_init(rdev, ring, 1024 * 1024);
- r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
- if (r)
return r;
- ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
- ring->ring_obj = NULL;
- r600_ring_init(rdev, ring, 1024 * 1024);
- r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
- if (r)
return r;
- ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; ring->ring_obj = NULL; r600_ring_init(rdev, ring, 256 * 1024);
@@ -5206,6 +5714,7 @@ int cik_init(struct radeon_device *rdev) cik_sdma_fini(rdev); cik_irq_fini(rdev); si_rlc_fini(rdev);
radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_vm_manager_fini(rdev);cik_mec_fini(rdev);
@@ -5241,6 +5750,7 @@ void cik_fini(struct radeon_device *rdev) cik_sdma_fini(rdev); cik_irq_fini(rdev); si_rlc_fini(rdev);
- cik_mec_fini(rdev); radeon_wb_fini(rdev); radeon_vm_manager_fini(rdev); radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 79be39e..63514b9 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -460,6 +460,13 @@ # define RDERR_INT_ENABLE (1 << 0) # define GUI_IDLE_INT_ENABLE (1 << 19)
+#define CP_CPC_STATUS 0x8210 +#define CP_CPC_BUSY_STAT 0x8214 +#define CP_CPC_STALLED_STAT1 0x8218 +#define CP_CPF_STATUS 0x821c +#define CP_CPF_BUSY_STAT 0x8220 +#define CP_CPF_STALLED_STAT1 0x8224
#define CP_MEC_CNTL 0x8234 #define MEC_ME2_HALT (1 << 28) #define MEC_ME1_HALT (1 << 30) @@ -468,6 +475,12 @@ #define MEC_ME2_HALT (1 << 28) #define MEC_ME1_HALT (1 << 30)
+#define CP_STALLED_STAT3 0x8670 +#define CP_STALLED_STAT1 0x8674 +#define CP_STALLED_STAT2 0x8678
+#define CP_STAT 0x8680
#define CP_ME_CNTL 0x86D8 #define CP_CE_HALT (1 << 24) #define CP_PFP_HALT (1 << 26) @@ -701,6 +714,11 @@ # define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID0_INT_STAT (1 << 31)
+#define CP_CPF_DEBUG 0xC200
+#define CP_PQ_WPTR_POLL_CNTL 0xC20C +#define WPTR_POLL_EN (1 << 31)
#define CP_ME1_PIPE0_INT_CNTL 0xC214 #define CP_ME1_PIPE1_INT_CNTL 0xC218 #define CP_ME1_PIPE2_INT_CNTL 0xC21C @@ -773,6 +791,50 @@ #define RLC_GPM_SCRATCH_ADDR 0xC4B0 #define RLC_GPM_SCRATCH_DATA 0xC4B4
+#define CP_HPD_EOP_BASE_ADDR 0xC904 +#define CP_HPD_EOP_BASE_ADDR_HI 0xC908 +#define CP_HPD_EOP_VMID 0xC90C +#define CP_HPD_EOP_CONTROL 0xC910 +#define EOP_SIZE(x) ((x) << 0) +#define EOP_SIZE_MASK (0x3f << 0) +#define CP_MQD_BASE_ADDR 0xC914 +#define CP_MQD_BASE_ADDR_HI 0xC918 +#define CP_HQD_ACTIVE 0xC91C +#define CP_HQD_VMID 0xC920
+#define CP_HQD_PQ_BASE 0xC934 +#define CP_HQD_PQ_BASE_HI 0xC938 +#define CP_HQD_PQ_RPTR 0xC93C +#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940 +#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944 +#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948 +#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C +#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950 +#define DOORBELL_OFFSET(x) ((x) << 2) +#define DOORBELL_OFFSET_MASK (0x1fffff << 2) +#define DOORBELL_SOURCE (1 << 28) +#define DOORBELL_SCHD_HIT (1 << 29) +#define DOORBELL_EN (1 << 30) +#define DOORBELL_HIT (1 << 31) +#define CP_HQD_PQ_WPTR 0xC954 +#define CP_HQD_PQ_CONTROL 0xC958 +#define QUEUE_SIZE(x) ((x) << 0) +#define QUEUE_SIZE_MASK (0x3f << 0) +#define RPTR_BLOCK_SIZE(x) ((x) << 8) +#define RPTR_BLOCK_SIZE_MASK (0x3f << 8) +#define PQ_VOLATILE (1 << 26) +#define NO_UPDATE_RPTR (1 << 27) +#define UNORD_DISPATCH (1 << 28) +#define ROQ_PQ_IB_FLIP (1 << 29) +#define PRIV_STATE (1 << 30) +#define KMD_QUEUE (1 << 31)
+#define CP_HQD_DEQUEUE_REQUEST 0xC974
+#define CP_MQD_CONTROL 0xC99C +#define MQD_VMID(x) ((x) << 0) +#define MQD_VMID_MASK (0xf << 0)
#define PA_SC_RASTER_CONFIG 0x28350 # define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_1 1 diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a2a3430..d40d506 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -714,6 +714,22 @@ struct radeon_ring { u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring); } funcs;
- /* for CIK queues */
- u32 me;
- u32 pipe;
- u32 queue;
- struct radeon_bo *mqd_obj;
- u32 doorbell_page_num;
- u32 doorbell_offset;
- unsigned wptr_offs;
+};
+struct radeon_mec {
- struct radeon_bo *hpd_eop_obj;
- u64 hpd_eop_gpu_addr;
- u32 num_pipe;
- u32 num_mec;
- u32 num_queue;
};
/* @@ -971,6 +987,8 @@ struct radeon_wb { #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 #define R600_WB_UVD_RPTR_OFFSET 2560 #define R600_WB_EVENT_OFFSET 3072 +#define CIK_WB_CP1_WPTR_OFFSET 3328 +#define CIK_WB_CP2_WPTR_OFFSET 3584
/**
- struct radeon_pm - power management datas
@@ -1760,6 +1778,7 @@ struct radeon_device { int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ struct si_rlc rlc;
- struct radeon_mec mec; struct work_struct hotplug_work; struct work_struct audio_work; struct work_struct reset_work;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index cf71734..7e265a5 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -121,9 +121,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority p->ring = RADEON_RING_TYPE_GFX_INDEX; break; case RADEON_CS_RING_COMPUTE:
if (p->rdev->family >= CHIP_BONAIRE)
p->ring = RADEON_RING_TYPE_GFX_INDEX;
else if (p->rdev->family >= CHIP_TAHITI) {
if (p->rdev->family >= CHIP_TAHITI) { if (p->priority > 0) p->ring = CAYMAN_RING_TYPE_CP1_INDEX; else
-- 1.7.7.5
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel