Hi
Here are a couple of patches that enable support for RV730 on big endian architectures. The following items will work : * KMS - correct color in framebuffer - interrupts - kms blit - power management * DDX - kms enabled - exa - video textures - hardware cursor
For the BE modifications, the AMD Linux Engineering team advised me to focus on : - make sure all the atombios data table accesses are endian safe - set the display controller swappers properly(http://lists.freedesktop.org/archives/dri-devel/2011-January/007486.html) - make sure the appropriate endian swapper is enabled in the texture and vertex fetch constants (see SQ_TEX_RESOURCE_WORD* and SQ_VTX_RESOURCE_WORD* regs) - make sure the shaders are stored in LE order - make sure shader constant buffers are stored in LE order - endian swap IH (interrupt handler) packets - endian swap the CP command buffers - endian swap WB (write back) buffers
That's what I tried to do and so far, I have KMS and DDX (with EXA) drivers working. Drivers have been tested on a MPC8640 custom design, with a custom graphics board based on E4690 (RV730). I'm using a custom linux distribution with Xorg 7.6 (xf86-video-ati 6.13.2). The latest kernel available for my board is 2.6.35.6 so I haven't tested with upstream kernel.
I'd also like to test more the ddx driver in order to check the EXA functions do a correct rendering. Is there a driver framework testing, or some kind of unit test you, ddx driver writers, are using ?
Regards, Cedric Cano
Signed-off-by: Cedric Cano <ccano at interfaceconcept.com> --- diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/atombios_crtc.c linux-2.6.35.6/drivers/gpu/drm/radeon/atombios_crtc.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/atombios_crtc.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/atombios_crtc.c 2011-01-27 15:03:46.000000000 +0100 @@ -808,6 +808,7 @@ struct radeon_bo *rbo; uint64_t fb_location; uint32_t fb_format, fb_pitch_pixels, tiling_flags; + u32 fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_NONE); int r;
/* no fb bound */ @@ -844,11 +845,17 @@ case 16: fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_16BPP) | EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB565)); +#ifdef __BIG_ENDIAN + fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_8IN16); +#endif break; case 24: case 32: fb_format = (EVERGREEN_GRPH_DEPTH(EVERGREEN_GRPH_DEPTH_32BPP) | EVERGREEN_GRPH_FORMAT(EVERGREEN_GRPH_FORMAT_ARGB8888)); +#ifdef __BIG_ENDIAN + fb_swap = EVERGREEN_GRPH_ENDIAN_SWAP(EVERGREEN_GRPH_ENDIAN_8IN32); +#endif break; default: DRM_ERROR("Unsupported screen depth %d\n", @@ -888,6 +895,7 @@ WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, (u32) fb_location & EVERGREEN_GRPH_SURFACE_ADDRESS_MASK); WREG32(EVERGREEN_GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format); + WREG32(EVERGREEN_GRPH_SWAP_CONTROL + radeon_crtc->crtc_offset, fb_swap);
WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0); WREG32(EVERGREEN_GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0); @@ -942,6 +950,7 @@ struct radeon_bo *rbo; uint64_t fb_location; uint32_t fb_format, fb_pitch_pixels, tiling_flags; + u32 fb_swap = R600_D1GRPH_SWAP_ENDIAN_NONE; int r;
/* no fb bound */ @@ -981,12 +990,18 @@ fb_format = AVIVO_D1GRPH_CONTROL_DEPTH_16BPP | AVIVO_D1GRPH_CONTROL_16BPP_RGB565; +#ifdef __BIG_ENDIAN + fb_swap = R600_D1GRPH_SWAP_ENDIAN_16BIT; +#endif break; case 24: case 32: fb_format = AVIVO_D1GRPH_CONTROL_DEPTH_32BPP | AVIVO_D1GRPH_CONTROL_32BPP_ARGB8888; +#ifdef __BIG_ENDIAN + fb_swap = R600_D1GRPH_SWAP_ENDIAN_32BIT; +#endif break; default: DRM_ERROR("Unsupported screen depth %d\n", @@ -1019,6 +1034,8 @@ WREG32(AVIVO_D1GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, (u32) fb_location); WREG32(AVIVO_D1GRPH_CONTROL + radeon_crtc->crtc_offset, fb_format); + if (rdev->family >= CHIP_R600) + WREG32(R600_D1GRPH_SWAP_CONTROL + radeon_crtc->crtc_offset, fb_swap);
WREG32(AVIVO_D1GRPH_SURFACE_OFFSET_X + radeon_crtc->crtc_offset, 0); WREG32(AVIVO_D1GRPH_SURFACE_OFFSET_Y + radeon_crtc->crtc_offset, 0); diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit.c linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit.c 2011-02-10 10:25:32.000000000 +0100 @@ -53,7 +53,9 @@ if (h < 8) h = 8;
- cb_color_info = ((format << 2) | (1 << 27)); + cb_color_info = (0 | + (format << 2) | + (1 << 27)); pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1;
@@ -137,9 +139,9 @@ ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
for (i = 0; i < r6xx_vs_size; i++) - vs[i] = r6xx_vs[i]; + vs[i] = cpu_to_le32(r6xx_vs[i]); for (i = 0; i < r6xx_ps_size; i++) - ps[i] = r6xx_ps[i]; + ps[i] = cpu_to_le32(r6xx_ps[i]);
dev_priv->blit_vb->used = 512;
@@ -191,7 +193,12 @@ RING_LOCALS; DRM_DEBUG("\n");
- sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8)); + sq_vtx_constant_word2 = (0 | +#ifdef __BIG_ENDIAN + (2 << 30) | +#endif + ((gpu_addr >> 32) & 0xff) | + (16 << 8));
BEGIN_RING(9); OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); @@ -235,7 +242,8 @@ sq_tex_resource_word1 = (format << 26); sq_tex_resource_word1 |= ((h - 1) << 0);
- sq_tex_resource_word4 = ((1 << 14) | + sq_tex_resource_word4 = (0 | + (1 << 14) | (0 << 16) | (1 << 19) | (2 << 22) | @@ -291,7 +299,11 @@ OUT_RING(DI_PT_RECTLIST);
OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); +#ifdef __BIG_ENDIAN + OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT); +#else OUT_RING(DI_INDEX_SIZE_16_BIT); +#endif
OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); OUT_RING(1); diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_kms.c linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_kms.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_kms.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_kms.c 2011-02-10 10:25:43.000000000 +0100 @@ -29,7 +29,9 @@ if (h < 8) h = 8;
- cb_color_info = ((format << 2) | (1 << 27)); + cb_color_info = (0 | + (format << 2) | + (1 << 27)); pitch = (w / 8) - 1; slice = ((w * h) / 64) - 1;
@@ -139,7 +141,12 @@ { u32 sq_vtx_constant_word2;
- sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8)); + sq_vtx_constant_word2 = (0 | +#ifdef __BIG_ENDIAN + (2 << 30) | +#endif + (upper_32_bits(gpu_addr) & 0xff) | + (16 << 8));
radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7)); radeon_ring_write(rdev, 0x460); @@ -181,7 +188,8 @@ sq_tex_resource_word1 = (format << 26); sq_tex_resource_word1 |= ((h - 1) << 0);
- sq_tex_resource_word4 = ((1 << 14) | + sq_tex_resource_word4 = (0 | + (1 << 14) | (0 << 16) | (1 << 19) | (2 << 22) | @@ -228,7 +236,11 @@ radeon_ring_write(rdev, DI_PT_RECTLIST);
radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0)); +#ifdef __BIG_ENDIAN + radeon_ring_write(rdev, (2 << 2) | DI_INDEX_SIZE_16_BIT); +#else radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT); +#endif
radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0)); radeon_ring_write(rdev, 1); @@ -399,7 +411,11 @@ dwords = ALIGN(rdev->r600_blit.state_len, 0x10); gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset; radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); +#ifdef __BIG_ENDIAN + radeon_ring_write(rdev, (gpu_addr & 0xFFFFFFFC) | (2 << 0)); +#else radeon_ring_write(rdev, gpu_addr & 0xFFFFFFFC); +#endif radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF); radeon_ring_write(rdev, dwords);
@@ -442,7 +458,7 @@ int r600_blit_init(struct radeon_device *rdev) { u32 obj_size; - int r, dwords; + int i, r, dwords; void *ptr; u32 packet2s[16]; int num_packet2s = 0; @@ -460,7 +476,7 @@
dwords = rdev->r600_blit.state_len; while (dwords & 0xf) { - packet2s[num_packet2s++] = PACKET2(0); + packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0)); dwords++; }
@@ -503,8 +519,12 @@ if (num_packet2s) memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4), packet2s, num_packet2s * 4); - memcpy(ptr + rdev->r600_blit.vs_offset, r6xx_vs, r6xx_vs_size * 4); - memcpy(ptr + rdev->r600_blit.ps_offset, r6xx_ps, r6xx_ps_size * 4); + for(i = 0; i < r6xx_vs_size; i++) { + *(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]); + } + for(i = 0; i < r6xx_ps_size; i++) { + *(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]); + } radeon_bo_kunmap(rdev->r600_blit.shader_obj); radeon_bo_unreserve(rdev->r600_blit.shader_obj); return 0; diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_shaders.c linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_shaders.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_shaders.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600_blit_shaders.c 2011-01-27 15:09:59.000000000 +0100 @@ -1075,7 +1075,11 @@ 0x00000000, 0x3c000000, 0x68cd1000, +#ifdef __BIG_ENDIAN + 0x000a0000, +#else 0x00080000, +#endif 0x00000000, };
diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600.c linux-2.6.35.6/drivers/gpu/drm/radeon/r600.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600.c 2011-02-09 11:31:24.000000000 +0100 @@ -2064,7 +2064,11 @@
r600_cp_stop(rdev);
- WREG32(CP_RB_CNTL, RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3)); + WREG32(CP_RB_CNTL, +#ifdef __BIG_ENDIAN + BUF_SWAP_32BIT | +#endif + RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
/* Reset cp */ WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); @@ -2149,7 +2153,11 @@ WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); WREG32(CP_RB_WPTR, 0); - WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFF); +#ifdef __BIG_ENDIAN + WREG32(CP_RB_RPTR_ADDR, (rdev->cp.gpu_addr & 0xFFFFFFFC) | (2 << 0)); +#else + WREG32(CP_RB_RPTR_ADDR, rdev->cp.gpu_addr & 0xFFFFFFFC); +#endif WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->cp.gpu_addr)); mdelay(1); WREG32(CP_RB_CNTL, tmp); @@ -2306,7 +2314,11 @@ } } WREG32(SCRATCH_ADDR, (rdev->wb.gpu_addr >> 8) & 0xFFFFFFFF); +#ifdef __BIG_ENDIAN + WREG32(CP_RB_RPTR_ADDR, ((rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC) | (2 << 0)); +#else WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + 1024) & 0xFFFFFFFC); +#endif WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + 1024) & 0xFF); WREG32(SCRATCH_UMSK, 0xff); return 0; @@ -2661,7 +2673,11 @@ { /* FIXME: implement */ radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); +#ifdef __BIG_ENDIAN + radeon_ring_write(rdev, (ib->gpu_addr & 0xFFFFFFFC) | (2 << 0)); +#else radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC); +#endif radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF); radeon_ring_write(rdev, ib->length_dw); } @@ -3316,8 +3332,8 @@ while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; - src_id = rdev->ih.ring[ring_index] & 0xff; - src_data = rdev->ih.ring[ring_index + 1] & 0xfffffff; + src_id = readl(rdev->ih.ring + ring_index) & 0xff; + src_data = readl(rdev->ih.ring + ring_index + 1) & 0xfffffff;
switch (src_id) { case 1: /* D1 vblank/vline */ diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600_cp.c linux-2.6.35.6/drivers/gpu/drm/radeon/r600_cp.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600_cp.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600_cp.c 2011-01-27 14:29:01.000000000 +0100 @@ -396,6 +396,9 @@ r600_do_cp_stop(dev_priv);
RADEON_WRITE(R600_CP_RB_CNTL, +#ifdef __BIG_ENDIAN + RADEON_BUF_SWAP_32BIT | +#endif R600_RB_NO_UPDATE | R600_RB_BLKSZ(15) | R600_RB_BUFSZ(3)); @@ -486,6 +489,9 @@ r600_do_cp_stop(dev_priv);
RADEON_WRITE(R600_CP_RB_CNTL, +#ifdef __BIG_ENDIAN + RADEON_BUF_SWAP_32BIT | +#endif R600_RB_NO_UPDATE | (15 << 8) | (3 << 0)); @@ -550,7 +556,11 @@
if (!dev_priv->writeback_works) { /* Disable writeback to avoid unnecessary bus master transfer */ - RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) | + RADEON_WRITE(R600_CP_RB_CNTL, +#ifdef __BIG_ENDIAN + RADEON_BUF_SWAP_32BIT | +#endif + RADEON_READ(R600_CP_RB_CNTL) | RADEON_RB_NO_UPDATE); RADEON_WRITE(R600_SCRATCH_UMSK, 0); } @@ -575,7 +585,11 @@
RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0); cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL); - RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA); + RADEON_WRITE(R600_CP_RB_CNTL, +#ifdef __BIG_ENDIAN + RADEON_BUF_SWAP_32BIT | +#endif + R600_RB_RPTR_WR_ENA);
RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr); RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr); @@ -1838,7 +1852,10 @@ + dev_priv->gart_vm_start; } RADEON_WRITE(R600_CP_RB_RPTR_ADDR, - rptr_addr & 0xffffffff); +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + (rptr_addr & 0xfffffffc)); RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI, upper_32_bits(rptr_addr));
@@ -1889,7 +1906,7 @@ { u64 scratch_addr;
- scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR); + scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC; scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32; scratch_addr += R600_SCRATCH_REG_OFFSET; scratch_addr >>= 8; diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/r600_reg.h linux-2.6.35.6/drivers/gpu/drm/radeon/r600_reg.h --- linux-2.6.35.6/drivers/gpu/drm/radeon/r600_reg.h 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/r600_reg.h 2011-01-27 15:05:50.000000000 +0100 @@ -81,7 +81,11 @@ #define R600_MEDIUM_VID_LOWER_GPIO_CNTL 0x720 #define R600_LOW_VID_LOWER_GPIO_CNTL 0x724
- +#define R600_D1GRPH_SWAP_CONTROL 0x610C +# define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) +# define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) +# define R600_D1GRPH_SWAP_ENDIAN_32BIT (2 << 0) +# define R600_D1GRPH_SWAP_ENDIAN_64BIT (3 << 0)
#define R600_HDP_NONSURFACE_BASE 0x2c04
diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_atombios.c linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_atombios.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_atombios.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_atombios.c 2011-01-27 16:08:53.000000000 +0100 @@ -147,7 +147,7 @@ pin = &gpio_info->asGPIO_Pin[i]; if (id == pin->ucGPIO_ID) { gpio.id = pin->ucGPIO_ID; - gpio.reg = pin->usGpioPin_AIndex * 4; + gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex) * 4; gpio.mask = (1 << pin->ucGpioPinBitShift); gpio.valid = true; break; @@ -1795,7 +1795,7 @@ firmware_info = (union firmware_info *)(mode_info->atom_context->bios + fw_data_offset); - vddc = firmware_info->info_14.usBootUpVDDCVoltage; + vddc = le16_to_cpu(firmware_info->info_14.usBootUpVDDCVoltage); }
/* add the i2c bus for thermal/fan chip */ @@ -1882,7 +1882,7 @@
rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type = VOLTAGE_SW;
rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = - clock_info->usVDDC; + le16_to_cpu(clock_info->usVDDC); /* XXX usVDDCI */ mode_index++; } else { @@ -1906,7 +1906,7 @@
rdev->pm.power_state[state_index].clock_info[mode_index].voltage.type = VOLTAGE_SW;
rdev->pm.power_state[state_index].clock_info[mode_index].voltage.voltage = - clock_info->usVDDC; + le16_to_cpu(clock_info->usVDDC); mode_index++; } } @@ -2011,7 +2011,7 @@ int index = GetIndexIntoMasterTable(COMMAND, GetEngineClock);
atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); - return args.ulReturnEngineClock; + return le32_to_cpu(args.ulReturnEngineClock); }
uint32_t radeon_atom_get_memory_clock(struct radeon_device *rdev) @@ -2020,7 +2020,7 @@ int index = GetIndexIntoMasterTable(COMMAND, GetMemoryClock);
atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); - return args.ulReturnMemoryClock; + return le32_to_cpu(args.ulReturnMemoryClock); }
void radeon_atom_set_engine_clock(struct radeon_device *rdev, @@ -2029,7 +2029,7 @@ SET_ENGINE_CLOCK_PS_ALLOCATION args; int index = GetIndexIntoMasterTable(COMMAND, SetEngineClock);
- args.ulTargetEngineClock = eng_clock; /* 10 khz */ + args.ulTargetEngineClock = cpu_to_le32(eng_clock); /* 10 khz */
atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); } @@ -2043,7 +2043,7 @@ if (rdev->flags & RADEON_IS_IGP) return;
- args.ulTargetMemoryClock = mem_clock; /* 10 khz */ + args.ulTargetMemoryClock = cpu_to_le32(mem_clock); /* 10 khz */
atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); } diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_cp.c linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_cp.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_cp.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/radeon_cp.c 2011-01-27 14:20:06.000000000 +0100 @@ -911,8 +911,11 @@
if (!dev_priv->writeback_works) { /* Disable writeback to avoid unnecessary bus master transfer */ - RADEON_WRITE(RADEON_CP_RB_CNTL, RADEON_READ(RADEON_CP_RB_CNTL) | - RADEON_RB_NO_UPDATE); + RADEON_WRITE(RADEON_CP_RB_CNTL, +#ifdef __BIG_ENDIAN + RADEON_BUF_SWAP_32BIT | +#endif + RADEON_READ(RADEON_CP_RB_CNTL) | RADEON_RB_NO_UPDATE); RADEON_WRITE(RADEON_SCRATCH_UMSK, 0); } } diff -Naur linux-2.6.35.6/drivers/gpu/drm/radeon/rv770.c linux-2.6.35.6/drivers/gpu/drm/radeon/rv770.c --- linux-2.6.35.6/drivers/gpu/drm/radeon/rv770.c 2010-09-27 02:19:16.000000000 +0200 +++ linux-2.6.35.6/drivers/gpu/drm/radeon/rv770.c 2011-01-27 14:52:04.000000000 +0100 @@ -264,7 +264,11 @@ return -EINVAL;
r700_cp_stop(rdev); - WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0)); + WREG32(CP_RB_CNTL, +#ifdef __BIG_ENDIAN + BUF_SWAP_32BIT | +#endif + RB_NO_UPDATE | (15 << 8) | (3 << 0));
/* Reset cp */ WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP); @@ -1114,6 +1118,8 @@ * should also allow to remove a bunch of callback function * like vram_info. */ +extern int r600_debugfs_mc_info_init(struct radeon_device *rdev); + int rv770_init(struct radeon_device *rdev) { int r; @@ -1121,6 +1127,9 @@ r = radeon_dummy_page_init(rdev); if (r) return r; + if (r600_debugfs_mc_info_init(rdev)) { + DRM_ERROR("Failed to register debugfs file for mc !\n"); + } /* This don't do much */ r = radeon_gem_init(rdev); if (r)
Signed-off-by: Cedric Cano <ccano at interfaceconcept.com> --- diff -Naur xf86-video-ati-6.13.2/src/drmmode_display.c xf86-video-ati-6.13.2/src/drmmode_display.c --- xf86-video-ati-6.13.2/src/drmmode_display.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/drmmode_display.c 2011-02-10 14:27:56.000000000 +0100 @@ -385,12 +385,15 @@ drmmode_load_cursor_argb (xf86CrtcPtr crtc, CARD32 *image) { drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; - void *ptr; + int i; + uint32_t *ptr;
/* cursor should be mapped already */ - ptr = drmmode_crtc->cursor_bo->ptr; + ptr = (uint32_t *)(drmmode_crtc->cursor_bo->ptr);
- memcpy (ptr, image, 64 * 64 * 4); + for(i = 0; i < 64 * 64; i++) { + ptr[i] = cpu_to_le32(image[i]); + }
return; } diff -Naur xf86-video-ati-6.13.2/src/r600_exa.c xf86-video-ati-6.13.2/src/r600_exa.c --- xf86-video-ati-6.13.2/src/r600_exa.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/r600_exa.c 2011-02-10 14:29:03.000000000 +0100 @@ -247,9 +247,15 @@ } else if (accel_state->dst_obj.bpp == 16) { cb_conf.format = COLOR_5_6_5; cb_conf.comp_swap = 2; /* RGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN16; +#endif } else { cb_conf.format = COLOR_8_8_8_8; cb_conf.comp_swap = 1; /* ARGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN32; +#endif } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; @@ -942,7 +948,18 @@ tex_res.bo = accel_state->src_obj[unit].bo; tex_res.mip_bo = accel_state->src_obj[unit].bo; tex_res.request_size = 1; - +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch(accel_state->src_obj[unit].bpp) { + case 16: + tex_res.endian = SQ_ENDIAN_8IN16; + break; + case 32: + tex_res.endian = SQ_ENDIAN_8IN32; + break; + default : + break; + } +#endif /* component swizzles */ switch (pPict->format) { case PICT_a1r5g5b5: @@ -1405,6 +1422,18 @@ } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch(dst_obj.bpp) { + case 16: + cb_conf.endian = ENDIAN_8IN16; + break; + case 32: + cb_conf.endian = ENDIAN_8IN32; + break; + default: + break; + } +#endif set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
BEGIN_BATCH(24); @@ -2116,6 +2145,15 @@ accel_state->xv_ps_offset = 3584; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+#if X_BYTE_ORDER == X_BIG_ENDIAN + { + int i; + for(i = 0; i < (4096 / 4); i++) { + shader[i] = cpu_to_le32(shader[i]); + } + } +#endif + #ifdef XF86DRM_MODE #if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) if (info->cs) { diff -Naur xf86-video-ati-6.13.2/src/r600_shader.c xf86-video-ati-6.13.2/src/r600_shader.c --- xf86-video-ati-6.13.2/src/r600_shader.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/r600_shader.c 2011-02-10 14:30:10.000000000 +0100 @@ -111,7 +111,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -341,7 +345,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -366,7 +374,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -596,7 +608,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -621,7 +637,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -1813,7 +1833,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -1838,7 +1862,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -1863,7 +1891,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(16), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; @@ -1889,7 +1921,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(0), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(1)); shader[i++] = VTX_DWORD_PAD; @@ -1914,7 +1950,11 @@ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE)); shader[i++] = VTX_DWORD2(OFFSET(8), - ENDIAN_SWAP(ENDIAN_NONE), +#if X_BYTE_ORDER == X_BIG_ENDIAN + ENDIAN_SWAP(SQ_ENDIAN_8IN32), +#else + ENDIAN_SWAP(SQ_ENDIAN_NONE), +#endif CONST_BUF_NO_STRIDE(0), MEGA_FETCH(0)); shader[i++] = VTX_DWORD_PAD; diff -Naur xf86-video-ati-6.13.2/src/r600_textured_videofuncs.c xf86-video-ati-6.13.2/src/r600_textured_videofuncs.c --- xf86-video-ati-6.13.2/src/r600_textured_videofuncs.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/r600_textured_videofuncs.c 2011-02-10 14:30:25.000000000 +0100 @@ -429,10 +429,16 @@ cb_conf.format = COLOR_5_6_5; cb_conf.comp_swap = 2; /* RGB */ } +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN16; +#endif break; case 32: cb_conf.format = COLOR_8_8_8_8; cb_conf.comp_swap = 1; /* ARGB */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + cb_conf.endian = ENDIAN_8IN32; +#endif break; default: return; diff -Naur xf86-video-ati-6.13.2/src/r6xx_accel.c xf86-video-ati-6.13.2/src/r6xx_accel.c --- xf86-video-ati-6.13.2/src/r6xx_accel.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/r6xx_accel.c 2011-02-08 11:46:29.000000000 +0100 @@ -1117,7 +1117,11 @@ BEGIN_BATCH(8 + count); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); +#if X_BYTE_ORDER == X_BIG_ENDIAN + E32(ib, (2 << 2) | draw_conf->index_type); +#else E32(ib, draw_conf->index_type); +#endif PACK3(ib, IT_NUM_INSTANCES, 1); E32(ib, draw_conf->num_instances);
@@ -1147,7 +1151,11 @@ BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); +#if X_BYTE_ORDER == X_BIG_ENDIAN + E32(ib, (2 << 2) | draw_conf->index_type); +#else E32(ib, draw_conf->index_type); +#endif PACK3(ib, IT_NUM_INSTANCES, 1); E32(ib, draw_conf->num_instances); PACK3(ib, IT_DRAW_INDEX_AUTO, 2); @@ -1183,6 +1191,9 @@ vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; +#if X_BYTE_ORDER == X_BIG_ENDIAN + vtx_res.endian = SQ_ENDIAN_8IN32; +#endif set_vtx_resource (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
/* Draw */ diff -Naur xf86-video-ati-6.13.2/src/radeon_atombios.c xf86-video-ati-6.13.2/src/radeon_atombios.c --- xf86-video-ati-6.13.2/src/radeon_atombios.c 2010-09-28 00:20:53.000000000 +0200 +++ xf86-video-ati-6.13.2/src/radeon_atombios.c 2011-02-08 11:47:15.000000000 +0100 @@ -782,15 +782,15 @@
mode->CrtcHDisplay = mode->HDisplay = le16_to_cpu(dtd->usHActive); mode->CrtcVDisplay = mode->VDisplay = le16_to_cpu(dtd->usVActive); - mode->CrtcHBlankStart = dtd->usHActive + dtd->ucHBorder; + mode->CrtcHBlankStart = le16_to_cpu(dtd->usHActive) + dtd->ucHBorder; mode->CrtcHBlankEnd = mode->CrtcHBlankStart + le16_to_cpu(dtd->usHBlanking_Time); mode->CrtcHTotal = mode->HTotal = mode->CrtcHBlankEnd + dtd->ucHBorder; - mode->CrtcVBlankStart = dtd->usVActive + dtd->ucVBorder; + mode->CrtcVBlankStart = le16_to_cpu(dtd->usVActive) + dtd->ucVBorder; mode->CrtcVBlankEnd = mode->CrtcVBlankStart + le16_to_cpu(dtd->usVBlanking_Time); mode->CrtcVTotal = mode->VTotal = mode->CrtcVBlankEnd + dtd->ucVBorder; mode->CrtcHSyncStart = mode->HSyncStart = dtd->usHActive + le16_to_cpu(dtd->usHSyncOffset); mode->CrtcHSyncEnd = mode->HSyncEnd = mode->HSyncStart + le16_to_cpu(dtd->usHSyncWidth); - mode->CrtcVSyncStart = mode->VSyncStart = dtd->usVActive + le16_to_cpu(dtd->usVSyncOffset); + mode->CrtcVSyncStart = mode->VSyncStart = le16_to_cpu(dtd->usVActive) + le16_to_cpu(dtd->usVSyncOffset); mode->CrtcVSyncEnd = mode->VSyncEnd = mode->VSyncStart + le16_to_cpu(dtd->usVSyncWidth);
mode->SynthClock = mode->Clock = le16_to_cpu(dtd->usPixClk) * 10; @@ -1541,7 +1541,7 @@
if (IS_DCE4_VARIANT) { if ((i == 7) && - (gpio->usClkMaskRegisterIndex == 0x1936) && + (le16_to_cpu(gpio->usClkMaskRegisterIndex) == 0x1936) && (gpio->sucI2cId.ucAccess == 0)) { gpio->sucI2cId.ucAccess = 0x97; gpio->ucDataMaskShift = 8; @@ -1646,7 +1646,7 @@ for (i = 0; i < num_indices; i++) { pin = &gpio_info->asGPIO_Pin[i]; if (record->ucHPDIntGPIOID == pin->ucGPIO_ID) { - if ((pin->usGpioPin_AIndex * 4) == reg) { + if ((le16_to_cpu(pin->usGpioPin_AIndex) * 4) == reg) { switch (pin->ucGpioPinBitShift) { case 0: default:
On Fri, Feb 11, 2011 at 4:52 AM, Cédric Cano ccano@interfaceconcept.com wrote:
Hi
Here are a couple of patches that enable support for RV730 on big endian architectures. The following items will work :
- KMS
- correct color in framebuffer - interrupts - kms blit - power management
- DDX
- kms enabled - exa - video textures - hardware cursor
For the BE modifications, the AMD Linux Engineering team advised me to focus on :
- make sure all the atombios data table accesses are endian safe
- set the display controller swappers
properly(http://lists.freedesktop.org/archives/dri-devel/2011-January/007486.html)
- make sure the appropriate endian swapper is enabled in the texture and
vertex fetch constants (see SQ_TEX_RESOURCE_WORD* and SQ_VTX_RESOURCE_WORD* regs)
- make sure the shaders are stored in LE order
- make sure shader constant buffers are stored in LE order
- endian swap IH (interrupt handler) packets
- endian swap the CP command buffers
- endian swap WB (write back) buffers
That was me :)
That's what I tried to do and so far, I have KMS and DDX (with EXA) drivers working. Drivers have been tested on a MPC8640 custom design, with a custom graphics board based on E4690 (RV730). I'm using a custom linux distribution with Xorg 7.6 (xf86-video-ati 6.13.2). The latest kernel available for my board is 2.6.35.6 so I haven't tested with upstream kernel.
I'll review and commit your patches this weekend. I'll probably break them up into smaller commits if that's ok with you.
I'd also like to test more the ddx driver in order to check the EXA functions do a correct rendering. Is there a driver framework testing, or some kind of unit test you, ddx driver writers, are using ?
cairo has some tests. Also, rendercheck.
Alex
Regards, Cedric Cano
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel
dri-devel@lists.freedesktop.org