On Thu, Apr 10, 2014 at 9:08 AM, Lauri Kasanen cand@gmx.com wrote:
This was originally un-inlined by Andi Kleen in 2011 citing size concerns. Indeed, inlining it grows radeon.ko by 7%.
However, 2% of cpu is spent in this function. Inlining it gives 1% more fps in Urban Terror.
Signed-off-by: Lauri Kasanen cand@gmx.com
drivers/gpu/drm/radeon/r100.c | 18 ------------------ drivers/gpu/drm/radeon/radeon.h | 20 ++++++++++++++++++-- 2 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index b6c3264..8169e82 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4086,24 +4086,6 @@ int r100_init(struct radeon_device *rdev) return 0; }
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect)
-{
if (reg < rdev->rmmio_size && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
else {
unsigned long flags;
uint32_t ret;
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
return ret;
}
-}
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5cf10a7..9231100 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2330,8 +2330,24 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect);
+static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect)
+{
if (reg < rdev->rmmio_size && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
Quick thought from someone entirely unfamiliar with the hardware: perhaps you can get the performance benefit without the size increase by moving the else portion into a non-inline function? I'm guessing that most accesses happen in the "if" branch.
else {
unsigned long flags;
uint32_t ret;
spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
return ret;
}
+}
void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect); u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); -- 1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel