From: Andi Kleen ak@linux.intel.com
Fixes
evergreen_cs_parse 4080 23124 +19044
and others compared to a non force inline kernel.
Cc: David Airlie airlied@linux.ie
Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/evergreen_cs.c | 8 ++++---- drivers/gpu/drm/radeon/r600_cs.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index a134790..35dce99 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -122,7 +122,7 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) track->db_s_write_bo = NULL; }
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i) +static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i) { /* XXX fill in */ return 0; @@ -242,7 +242,7 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, * Check next packet is relocation packet3, do bo validation and compute * GPU offset using the provided start. **/ -static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) +static int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) { struct radeon_cs_packet p3reloc; int r; @@ -414,7 +414,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p, * if register is safe. If register is not flag as safe this function * will test it against a list of register needind special handling. */ -static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track; struct radeon_cs_reloc *reloc; @@ -990,7 +990,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3 * This function will check that the resource has valid field and that * the texture and mipmap bo object are big enough to cover this resource. */ -static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx, +static int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx, struct radeon_bo *texture, struct radeon_bo *mipmap) { diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index cf83aa0..7339c0b 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = { [V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR}, };
-static inline bool fmt_is_valid_color(u32 format) +static bool fmt_is_valid_color(u32 format) { if (format >= ARRAY_SIZE(color_formats_table)) return false; @@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format) return false; }
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family) +static bool fmt_is_valid_texture(u32 format, enum radeon_family family) { if (format >= ARRAY_SIZE(color_formats_table)) return false; @@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family) return false; }
-static inline int fmt_get_blocksize(u32 format) +static int fmt_get_blocksize(u32 format) { if (format >= ARRAY_SIZE(color_formats_table)) return 0; @@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format) return color_formats_table[format].blocksize; }
-static inline int fmt_get_nblocksx(u32 format, u32 w) +static int fmt_get_nblocksx(u32 format, u32 w) { unsigned bw;
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w) return (w + bw - 1) / bw; }
-static inline int fmt_get_nblocksy(u32 format, u32 h) +static int fmt_get_nblocksy(u32 format, u32 h) { unsigned bh;
@@ -223,7 +223,7 @@ static inline int fmt_get_nblocksy(u32 format, u32 h) return (h + bh - 1) / bh; }
-static inline int r600_bpe_from_format(u32 *bpe, u32 format) +static int r600_bpe_from_format(u32 *bpe, u32 format) { unsigned res;
@@ -252,7 +252,7 @@ struct array_mode_checker { };
/* returns alignment in pixels for pitch/height/depth and bytes for base */ -static inline int r600_get_array_mode_alignment(struct array_mode_checker *values, +static int r600_get_array_mode_alignment(struct array_mode_checker *values, u32 *pitch_align, u32 *height_align, u32 *depth_align, @@ -331,7 +331,7 @@ static void r600_cs_track_init(struct r600_cs_track *track) track->db_depth_control = 0xFFFFFFFF; }
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) +static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) { struct r600_cs_track *track = p->track; u32 slice_tile_max, size, tmp; @@ -737,7 +737,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p, * Check next packet is relocation packet3, do bo validation and compute * GPU offset using the provided start. **/ -static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) +static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) { struct radeon_cs_packet p3reloc; int r; @@ -911,7 +911,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p, * if register is safe. If register is not flag as safe this function * will test it against a list of register needind special handling. */ -static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) +static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) { struct r600_cs_track *track = (struct r600_cs_track *)p->track; struct radeon_cs_reloc *reloc; @@ -1215,7 +1215,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx return 0; }
-static inline unsigned mip_minify(unsigned size, unsigned level) +static unsigned mip_minify(unsigned size, unsigned level) { unsigned val;
@@ -1285,7 +1285,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel, * This function will check that the resource has valid field and that * the texture and mipmap bo object are big enough to cover this resource. */ -static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, +static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx, struct radeon_bo *texture, struct radeon_bo *mipmap, u64 base_offset,
From: Andi Kleen ak@linux.intel.com
This shrinks the sizes of a lot of functions in the radeon driver dramatically.
With a non force inline + -Os kernel this is default anyways.
Cc: David Airlie airlied@linux.ie
Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/r100.c | 40 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 43 +++----------------------------------- 2 files changed, 44 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7fcdbbb..9a1efac 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3965,3 +3965,43 @@ int r100_init(struct radeon_device *rdev) } return 0; } + +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) +{ + if (reg < rdev->rmmio_size) + return readl(((void __iomem *)rdev->rmmio) + reg); + else { + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + } +} + +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + if (reg < rdev->rmmio_size) + writel(v, ((void __iomem *)rdev->rmmio) + reg); + else { + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + } +} + +u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) +{ + if (reg < rdev->rio_mem_size) + return ioread32(rdev->rio_mem + reg); + else { + iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); + return ioread32(rdev->rio_mem + RADEON_MM_DATA); + } +} + +void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + if (reg < rdev->rio_mem_size) + iowrite32(v, rdev->rio_mem + reg); + else { + iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); + iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); + } +} diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index c1e056b..8ac6cba 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1252,45 +1252,10 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) -{ - if (reg < rdev->rmmio_size) - return readl((rdev->rmmio) + reg); - else { - writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); - return readl((rdev->rmmio) + RADEON_MM_DATA); - } -} - -static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) -{ - if (reg < rdev->rmmio_size) - writel(v, (rdev->rmmio) + reg); - else { - writel(reg, (rdev->rmmio) + RADEON_MM_INDEX); - writel(v, (rdev->rmmio) + RADEON_MM_DATA); - } -} - -static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) -{ - if (reg < rdev->rio_mem_size) - return ioread32(rdev->rio_mem + reg); - else { - iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); - return ioread32(rdev->rio_mem + RADEON_MM_DATA); - } -} - -static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - if (reg < rdev->rio_mem_size) - iowrite32(v, rdev->rio_mem + reg); - else { - iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX); - iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); - } -} +uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); +void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); +void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
/* * Cast helper
From: Andi Kleen ak@linux.intel.com
Cc: David Airlie airlied@linux.ie
Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/r600_blit.c | 24 ++++++++++++------------ 1 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c index 7f10434..3c031a4 100644 --- a/drivers/gpu/drm/radeon/r600_blit.c +++ b/drivers/gpu/drm/radeon/r600_blit.c @@ -41,7 +41,7 @@ #define COLOR_5_6_5 0x8 #define COLOR_8_8_8_8 0x1a
-static inline void +static void set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr) { u32 cb_color_info; @@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 ADVANCE_RING(); }
-static inline void +static void cp_set_surface_sync(drm_radeon_private_t *dev_priv, u32 sync_type, u32 size, u64 mc_addr) { @@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv, ADVANCE_RING(); }
-static inline void +static void set_shaders(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; @@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev) R600_SH_ACTION_ENA, 512, gpu_addr); }
-static inline void +static void set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) { uint32_t sq_vtx_constant_word2; @@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr) R600_VC_ACTION_ENA, 48, gpu_addr); }
-static inline void +static void set_tex_resource(drm_radeon_private_t *dev_priv, int format, int w, int h, int pitch, u64 gpu_addr) { @@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
}
-static inline void +static void set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) { RING_LOCALS; @@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2) ADVANCE_RING(); }
-static inline void +static void draw_auto(drm_radeon_private_t *dev_priv) { RING_LOCALS; @@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv) COMMIT_RING(); }
-static inline void +static void set_default_state(drm_radeon_private_t *dev_priv) { int i; @@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv) ADVANCE_RING(); }
-static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction;
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input) }
-static inline int r600_nomm_get_vb(struct drm_device *dev) +static int r600_nomm_get_vb(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; dev_priv->blit_vb = radeon_freelist_get(dev); @@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev) return 0; }
-static inline void r600_nomm_put_vb(struct drm_device *dev) +static void r600_nomm_put_vb(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev) radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb); }
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev) +static void *r600_nomm_get_vb_ptr(struct drm_device *dev) { drm_radeon_private_t *dev_priv = dev->dev_private; return (((char *)dev->agp_buffer_map->handle +
From: Andi Kleen ak@linux.intel.com
With the dropped inlines gccs starts warning about genuinely unused functions. Remove r600_bpe_from_format, evergreen_cs_track_validate_cb, evergreen-cs_packet_next_is_pkt3_nop which are all unused.
Cc: David Airlie airlied@linux.ie
Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/evergreen_cs.c | 28 ---------------------------- drivers/gpu/drm/radeon/r600_cs.c | 19 ------------------- 2 files changed, 0 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 35dce99..7fdfa8e 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) track->db_s_write_bo = NULL; }
-static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i) -{ - /* XXX fill in */ - return 0; -} - static int evergreen_cs_track_check(struct radeon_cs_parser *p) { struct evergreen_cs_track *track = p->track; @@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, }
/** - * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc - * @parser: parser structure holding parsing context. - * - * Check next packet is relocation packet3, do bo validation and compute - * GPU offset using the provided start. - **/ -static int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p) -{ - struct radeon_cs_packet p3reloc; - int r; - - r = evergreen_cs_packet_parse(p, &p3reloc, p->idx); - if (r) { - return 0; - } - if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) { - return 0; - } - return 1; -} - -/** * evergreen_cs_packet_next_vline() - parse userspace VLINE packet * @parser: parser structure holding parsing context. * diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 7339c0b..0a2e023 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -223,25 +223,6 @@ static int fmt_get_nblocksy(u32 format, u32 h) return (h + bh - 1) / bh; }
-static int r600_bpe_from_format(u32 *bpe, u32 format) -{ - unsigned res; - - if (format >= ARRAY_SIZE(color_formats_table)) - goto fail; - - res = color_formats_table[format].blocksize; - if (res == 0) - goto fail; - - *bpe = res; - return 0; - -fail: - *bpe = 16; - return -EINVAL; -} - struct array_mode_checker { int array_mode; u32 group_size;
From: Andi Kleen ak@linux.intel.com
This fixes size regressions like
radeon_set_suspend 1724 7873 +6149 radeon_reinitialize_M10 3974 9285 +5311 radeon_pm_disable_dynamic_mode 868 6125 +5257 radeon_pm_enable_dynamic_mode 985 6065 +5080 radeon_pm_start_mclk_sclk - 4614 +4614 radeon_write_mode 1252 5377 +4125
among others compared to a non force inline kernel.
Cc: David Airlie airlied@linux.ie Cc: benh@kernel.crashing.org Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/video/aty/radeon_accel.c | 88 ++++++++++++++++++++++++++++++++++ drivers/video/aty/radeonfb.h | 96 +++----------------------------------- 2 files changed, 95 insertions(+), 89 deletions(-)
diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c index a469a3d..0f1e367 100644 --- a/drivers/video/aty/radeon_accel.c +++ b/drivers/video/aty/radeon_accel.c @@ -326,3 +326,91 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
radeon_engine_idle (); } + + +void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask) +{ + unsigned long flags; + unsigned int tmp; + + spin_lock_irqsave(&rinfo->reg_lock, flags); + tmp = INREG(addr); + tmp &= (mask); + tmp |= (val); + OUTREG(addr, tmp); + spin_unlock_irqrestore(&rinfo->reg_lock, flags); +} + + +/* + * Note about PLL register accesses: + * + * I have removed the spinlock on them on purpose. The driver now + * expects that it will only manipulate the PLL registers in normal + * task environment, where radeon_msleep() will be called, protected + * by a semaphore (currently the console semaphore) so that no conflict + * will happen on the PLL register index. + * + * With the latest changes to the VT layer, this is guaranteed for all + * calls except the actual drawing/blits which aren't supposed to use + * the PLL registers anyway + * + * This is very important for the workarounds to work properly. The only + * possible exception to this rule is the call to unblank(), which may + * be done at irq time if an oops is in progress. + */ +void radeon_pll_errata_after_index(struct radeonfb_info *rinfo) +{ + if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS)) + return; + + (void)INREG(CLOCK_CNTL_DATA); + (void)INREG(CRTC_GEN_CNTL); +} + +void radeon_pll_errata_after_data(struct radeonfb_info *rinfo) +{ + if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) { + /* we can't deal with posted writes here ... */ + _radeon_msleep(rinfo, 5); + } + if (rinfo->errata & CHIP_ERRATA_R300_CG) { + u32 save, tmp; + save = INREG(CLOCK_CNTL_INDEX); + tmp = save & ~(0x3f | PLL_WR_EN); + OUTREG(CLOCK_CNTL_INDEX, tmp); + tmp = INREG(CLOCK_CNTL_DATA); + OUTREG(CLOCK_CNTL_INDEX, save); + } +} + +u32 __INPLL(struct radeonfb_info *rinfo, u32 addr) +{ + u32 data; + + OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f); + radeon_pll_errata_after_index(rinfo); + data = INREG(CLOCK_CNTL_DATA); + radeon_pll_errata_after_data(rinfo); + return data; +} + +void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val) +{ + + OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080); + radeon_pll_errata_after_index(rinfo); + OUTREG(CLOCK_CNTL_DATA, val); + radeon_pll_errata_after_data(rinfo); +} + +void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index, u32 val, u32 mask) +{ + unsigned int tmp; + + tmp = __INPLL(rinfo, index); + tmp &= (mask); + tmp |= (val); + __OUTPLL(rinfo, index, tmp); +} + diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h index 7351e66..cde9c2e 100644 --- a/drivers/video/aty/radeonfb.h +++ b/drivers/video/aty/radeonfb.h @@ -393,98 +393,16 @@ static inline void _radeon_msleep(struct radeonfb_info *rinfo, unsigned long ms) #define INREG(addr) readl((rinfo->mmio_base)+addr) #define OUTREG(addr,val) writel(val, (rinfo->mmio_base)+addr)
-static inline void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, - u32 val, u32 mask) -{ - unsigned long flags; - unsigned int tmp; - - spin_lock_irqsave(&rinfo->reg_lock, flags); - tmp = INREG(addr); - tmp &= (mask); - tmp |= (val); - OUTREG(addr, tmp); - spin_unlock_irqrestore(&rinfo->reg_lock, flags); -} +void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask); +void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index, + u32 val, u32 mask); +void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val); +u32 __INPLL(struct radeonfb_info *rinfo, u32 addr); +void radeon_pll_errata_after_data(struct radeonfb_info *rinfo); +void radeon_pll_errata_after_index(struct radeonfb_info *rinfo);
#define OUTREGP(addr,val,mask) _OUTREGP(rinfo, addr, val,mask)
-/* - * Note about PLL register accesses: - * - * I have removed the spinlock on them on purpose. The driver now - * expects that it will only manipulate the PLL registers in normal - * task environment, where radeon_msleep() will be called, protected - * by a semaphore (currently the console semaphore) so that no conflict - * will happen on the PLL register index. - * - * With the latest changes to the VT layer, this is guaranteed for all - * calls except the actual drawing/blits which aren't supposed to use - * the PLL registers anyway - * - * This is very important for the workarounds to work properly. The only - * possible exception to this rule is the call to unblank(), which may - * be done at irq time if an oops is in progress. - */ -static inline void radeon_pll_errata_after_index(struct radeonfb_info *rinfo) -{ - if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS)) - return; - - (void)INREG(CLOCK_CNTL_DATA); - (void)INREG(CRTC_GEN_CNTL); -} - -static inline void radeon_pll_errata_after_data(struct radeonfb_info *rinfo) -{ - if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) { - /* we can't deal with posted writes here ... */ - _radeon_msleep(rinfo, 5); - } - if (rinfo->errata & CHIP_ERRATA_R300_CG) { - u32 save, tmp; - save = INREG(CLOCK_CNTL_INDEX); - tmp = save & ~(0x3f | PLL_WR_EN); - OUTREG(CLOCK_CNTL_INDEX, tmp); - tmp = INREG(CLOCK_CNTL_DATA); - OUTREG(CLOCK_CNTL_INDEX, save); - } -} - -static inline u32 __INPLL(struct radeonfb_info *rinfo, u32 addr) -{ - u32 data; - - OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f); - radeon_pll_errata_after_index(rinfo); - data = INREG(CLOCK_CNTL_DATA); - radeon_pll_errata_after_data(rinfo); - return data; -} - -static inline void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, - u32 val) -{ - - OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080); - radeon_pll_errata_after_index(rinfo); - OUTREG(CLOCK_CNTL_DATA, val); - radeon_pll_errata_after_data(rinfo); -} - - -static inline void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index, - u32 val, u32 mask) -{ - unsigned int tmp; - - tmp = __INPLL(rinfo, index); - tmp &= (mask); - tmp |= (val); - __OUTPLL(rinfo, index, tmp); -} - - #define INPLL(addr) __INPLL(rinfo, addr) #define OUTPLL(index, val) __OUTPLL(rinfo, index, val) #define OUTPLLP(index, val, mask) __OUTPLLP(rinfo, index, val, mask)
From: Andi Kleen ak@linux.intel.com
Remove bogus inlines in evergreen and r100.
Cc: airlied@linux.ie Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/evergreen.c | 4 +- drivers/gpu/drm/radeon/evergreen_blit_kms.c | 2 +- drivers/gpu/drm/radeon/r100.c | 106 +++++++++++++++++++++++++- drivers/gpu/drm/radeon/r100_track.h | 110 ++------------------------- 4 files changed, 114 insertions(+), 108 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c4ffa14f..1c5cd09 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2586,7 +2586,7 @@ int evergreen_irq_set(struct radeon_device *rdev) return 0; }
-static inline void evergreen_irq_ack(struct radeon_device *rdev) +static void evergreen_irq_ack(struct radeon_device *rdev) { u32 tmp;
@@ -2697,7 +2697,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev) r600_rlc_stop(rdev); }
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev) +static u32 evergreen_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c index 2eb2518..7eb78b3 100644 --- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -584,7 +584,7 @@ set_default_state(struct radeon_device *rdev)
}
-static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 9a1efac..6720929 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520); * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + u32 value; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib->ptr[idx] = (value & 0x3fc00000) | tmp; + return 0; +} + +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib->ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + if (c > 16) { + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} + void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ @@ -588,7 +690,7 @@ void r100_irq_disable(struct radeon_device *rdev) WREG32(R_000044_GEN_INT_STATUS, tmp); }
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev) +static uint32_t r100_irq_ack(struct radeon_device *rdev) { uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS); uint32_t irq_mask = RADEON_SW_INT_TEST | @@ -3147,7 +3249,7 @@ void r100_bandwidth_update(struct radeon_device *rdev) } }
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t) +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) { DRM_ERROR("pitch %d\n", t->pitch); DRM_ERROR("use_pitch %d\n", t->use_pitch); diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h index 686f9dc..6a603b3 100644 --- a/drivers/gpu/drm/radeon/r100_track.h +++ b/drivers/gpu/drm/radeon/r100_track.h @@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, unsigned idx, unsigned reg);
- - -static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - unsigned idx, - unsigned reg) -{ - int r; - u32 tile_flags = 0; - u32 tmp; - struct radeon_cs_reloc *reloc; - u32 value; - - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - value = radeon_get_ib_value(p, idx); - tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - - tmp |= tile_flags; - p->ib->ptr[idx] = (value & 0x3fc00000) | tmp; - return 0; -} - -static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - int idx) -{ - unsigned c, i; - struct radeon_cs_reloc *reloc; - struct r100_cs_track *track; - int r = 0; - volatile uint32_t *ib; - u32 idx_value; - - ib = p->ib->ptr; - track = (struct r100_cs_track *)p->track; - c = radeon_get_ib_value(p, idx++) & 0x1F; - if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = idx_value >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].esize &= 0x7F; - } - return r; -} +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg); +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx);
From: Andi Kleen ak@linux.intel.com
With this patch I'm only about 50k larger with DRM debugging enables (why is that enabled by default?!?), and slightly smaller without.
Cc: airlied@linux.ie Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/radeon/r100.c | 39 ++++++++++++++++++++++++++ drivers/gpu/drm/radeon/r300_cmdbuf.c | 2 +- drivers/gpu/drm/radeon/r600.c | 4 +- drivers/gpu/drm/radeon/r600_blit_kms.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 39 +++++---------------------- drivers/gpu/drm/radeon/radeon_atombios.c | 4 +- drivers/gpu/drm/radeon/radeon_irq.c | 2 +- drivers/gpu/drm/radeon/radeon_legacy_tv.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 41 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_object.h | 42 ++-------------------------- drivers/gpu/drm/radeon/radeon_state.c | 16 +++++----- 11 files changed, 106 insertions(+), 87 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 6720929..1bbed1f 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4107,3 +4107,42 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v) iowrite32(v, rdev->rio_mem + RADEON_MM_DATA); } } + +/* Better place? */ +u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) +{ + struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; + u32 pg_idx, pg_offset; + u32 idx_value = 0; + int new_page; + + pg_idx = (idx * 4) / PAGE_SIZE; + pg_offset = (idx * 4) % PAGE_SIZE; + + if (ibc->kpage_idx[0] == pg_idx) + return ibc->kpage[0][pg_offset/4]; + if (ibc->kpage_idx[1] == pg_idx) + return ibc->kpage[1][pg_offset/4]; + + new_page = radeon_cs_update_pages(p, pg_idx); + if (new_page < 0) { + p->parser_error = new_page; + return 0; + } + + idx_value = ibc->kpage[new_page][pg_offset/4]; + return idx_value; +} + +void radeon_ring_write(struct radeon_device *rdev, uint32_t v) +{ +#if DRM_DEBUG_CODE + if (rdev->cp.count_dw <= 0) { + DRM_ERROR("radeon: writting more dword to ring than expected !\n"); + } +#endif + rdev->cp.ring[rdev->cp.wptr++] = v; + rdev->cp.wptr &= rdev->cp.ptr_mask; + rdev->cp.count_dw--; + rdev->cp.ring_free_dw--; +} diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c index c5c2742..1fe98b4 100644 --- a/drivers/gpu/drm/radeon/r300_cmdbuf.c +++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c @@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv, /** * Emit the sequence to pacify R300. */ -static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv) +static void r300_pacify(drm_radeon_private_t *dev_priv) { uint32_t cache_z, cache_3d, cache_2d; RING_LOCALS; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 720dd99..c85047f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3137,7 +3137,7 @@ int r600_irq_set(struct radeon_device *rdev) return 0; }
-static inline void r600_irq_ack(struct radeon_device *rdev) +static void r600_irq_ack(struct radeon_device *rdev) { u32 tmp;
@@ -3238,7 +3238,7 @@ void r600_irq_disable(struct radeon_device *rdev) r600_disable_interrupt_state(rdev); }
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev) +static u32 r600_get_ih_wptr(struct radeon_device *rdev) { u32 wptr, tmp;
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index 9aa74c3..bbbafe6 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -450,7 +450,7 @@ set_default_state(struct radeon_device *rdev) radeon_ring_write(rdev, sq_stack_resource_mgmt_2); }
-static inline uint32_t i2f(uint32_t input) +static uint32_t i2f(uint32_t input) { u32 result, i, exponent, fraction;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8ac6cba..5ed7ef7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -601,32 +601,7 @@ struct radeon_cs_parser {
extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); extern int radeon_cs_finish_pages(struct radeon_cs_parser *p); - - -static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) -{ - struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; - u32 pg_idx, pg_offset; - u32 idx_value = 0; - int new_page; - - pg_idx = (idx * 4) / PAGE_SIZE; - pg_offset = (idx * 4) % PAGE_SIZE; - - if (ibc->kpage_idx[0] == pg_idx) - return ibc->kpage[0][pg_offset/4]; - if (ibc->kpage_idx[1] == pg_idx) - return ibc->kpage[1][pg_offset/4]; - - new_page = radeon_cs_update_pages(p, pg_idx); - if (new_page < 0) { - p->parser_error = new_page; - return 0; - } - - idx_value = ibc->kpage[new_page][pg_offset/4]; - return idx_value; -} +extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
struct radeon_cs_packet { unsigned idx; @@ -1378,19 +1353,19 @@ void radeon_atombios_fini(struct radeon_device *rdev); /* * RING helpers. */ + +#if DRM_DEBUG_CODE == 0 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v) { -#if DRM_DEBUG_CODE - if (rdev->cp.count_dw <= 0) { - DRM_ERROR("radeon: writting more dword to ring than expected !\n"); - } -#endif rdev->cp.ring[rdev->cp.wptr++] = v; rdev->cp.wptr &= rdev->cp.ptr_mask; rdev->cp.count_dw--; rdev->cp.ring_free_dw--; } - +#else +/* With debugging this is just too big to inline */ +void radeon_ring_write(struct radeon_device *rdev, uint32_t v); +#endif
/* * ASICs macro. diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index bf2b615..08d0b94 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -62,7 +62,7 @@ union atom_supported_devices { struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1; };
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev, +static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev, uint8_t id) { struct atom_context *ctx = rdev->mode_info.atom_context; @@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) } }
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, +static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev, u8 id) { struct atom_context *ctx = rdev->mode_info.atom_context; diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c index 465746b..00da384 100644 --- a/drivers/gpu/drm/radeon/radeon_irq.c +++ b/drivers/gpu/drm/radeon/radeon_irq.c @@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc) } }
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int) +static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int) { u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS); u32 irq_mask = RADEON_SW_INT_TEST; diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c index c7b6cb4..b37ec0f 100644 --- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c +++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c @@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder, *v_sync_strt_wid = tmp; }
-static inline int get_post_div(int value) +static int get_post_div(int value) { int post_div; switch (value) { diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 976c3b1..1c85152 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) } return 0; } + +int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) +{ + int r; + + r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); + if (unlikely(r != 0)) + return r; + spin_lock(&bo->tbo.bdev->fence_lock); + if (mem_type) + *mem_type = bo->tbo.mem.mem_type; + if (bo->tbo.sync_obj) + r = ttm_bo_wait(&bo->tbo, true, true, no_wait); + spin_unlock(&bo->tbo.bdev->fence_lock); + ttm_bo_unreserve(&bo->tbo); + return r; +} + + +/** + * radeon_bo_reserve - reserve bo + * @bo: bo structure + * @no_wait: don't sleep while trying to reserve (return -EBUSY) + * + * Returns: + * -EBUSY: buffer is busy and @no_wait is true + * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by + * a signal. Release all buffer reservations and return to user-space. + */ +int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait) +{ + int r; + + r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + dev_err(bo->rdev->dev, "%p reserve failed\n", bo); + return r; + } + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index ede6c13..b07f0f9 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type) return 0; }
-/** - * radeon_bo_reserve - reserve bo - * @bo: bo structure - * @no_wait: don't sleep while trying to reserve (return -EBUSY) - * - * Returns: - * -EBUSY: buffer is busy and @no_wait is true - * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by - * a signal. Release all buffer reservations and return to user-space. - */ -static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait) -{ - int r; - - r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - dev_err(bo->rdev->dev, "%p reserve failed\n", bo); - return r; - } - return 0; -} +int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
static inline void radeon_bo_unreserve(struct radeon_bo *bo) { @@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo) return bo->tbo.addr_space_offset; }
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, - bool no_wait) -{ - int r; - - r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0); - if (unlikely(r != 0)) - return r; - spin_lock(&bo->tbo.bdev->fence_lock); - if (mem_type) - *mem_type = bo->tbo.mem.mem_type; - if (bo->tbo.sync_obj) - r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); - ttm_bo_unreserve(&bo->tbo); - return r; -} +extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, + bool no_wait);
extern int radeon_bo_create(struct radeon_device *rdev, unsigned long size, int byte_align, diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 92e7ea7..e8422ae 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t * return 0; }
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t * - dev_priv, - struct drm_file *file_priv, - drm_radeon_kcmd_buffer_t * - cmdbuf, - unsigned int *cmdsz) +static int radeon_check_and_fixup_packet3(drm_radeon_private_t * + dev_priv, + struct drm_file *file_priv, + drm_radeon_kcmd_buffer_t * + cmdbuf, + unsigned int *cmdsz) { u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0); u32 offset, narrays; @@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t * * CP hardware state programming functions */
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv, - struct drm_clip_rect * box) +static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv, + struct drm_clip_rect * box) { RING_LOCALS;
From: Andi Kleen ak@linux.intel.com
This saves about 2.5k text on a non force inline kernel.
Cc: x86@kernel.org Signed-off-by: Andi Kleen ak@linux.intel.com --- arch/x86/include/asm/desc.h | 17 +---------------- arch/x86/kernel/irqinit.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 41935fa..f7183e1 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -336,22 +336,7 @@ extern int first_system_vector; /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ extern unsigned long used_vectors[];
-static inline void alloc_system_vector(int vector) -{ - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); - if (first_system_vector > vector) - first_system_vector = vector; - } else { - BUG(); - } -} - -static inline void alloc_intr_gate(unsigned int n, void *addr) -{ - alloc_system_vector(n); - set_intr_gate(n, addr); -} +void alloc_intr_gate(unsigned int n, void *addr);
/* * This routine sets up an interrupt gate at directory privilege level 3. diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b3300e6..b6e769b 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -162,6 +162,26 @@ void setup_vector_irq(int cpu) __setup_vector_irq(cpu); }
+static void alloc_system_vector(int vector) +{ + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); + if (first_system_vector > vector) + first_system_vector = vector; + } else + BUG(); +} + +/* + * This could be made __init if xen didn't abuse it in its + * suspend path! + */ +void alloc_intr_gate(unsigned int n, void *addr) +{ + alloc_system_vector(n); + set_intr_gate(n, addr); +} + static void __init smp_intr_init(void) { #ifdef CONFIG_SMP
From: Andi Kleen ak@linux.intel.com
These are not time critical, and using an out of line function saves about 2.5k text on a non force inline kernel.
I left the main hotpath user -- readahead -- inline for now.
Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/base/node.c | 72 +++++++++++++++++++++++++++----------------------- 1 files changed, 39 insertions(+), 33 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c index 793f796..0b8e7a2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -58,6 +58,12 @@ static inline ssize_t node_read_cpulist(struct sys_device *dev, static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
+/* Don't inline */ +static unsigned long my_node_page_state(int node, enum zone_stat_item item) +{ + return node_page_state(node, item); +} + #define K(x) ((x) << (PAGE_SHIFT - 10)) static ssize_t node_read_meminfo(struct sys_device * dev, struct sysdev_attribute *attr, char * buf) @@ -82,16 +88,16 @@ static ssize_t node_read_meminfo(struct sys_device * dev, nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), - nid, K(node_page_state(nid, NR_ACTIVE_ANON) + - node_page_state(nid, NR_ACTIVE_FILE)), - nid, K(node_page_state(nid, NR_INACTIVE_ANON) + - node_page_state(nid, NR_INACTIVE_FILE)), - nid, K(node_page_state(nid, NR_ACTIVE_ANON)), - nid, K(node_page_state(nid, NR_INACTIVE_ANON)), - nid, K(node_page_state(nid, NR_ACTIVE_FILE)), - nid, K(node_page_state(nid, NR_INACTIVE_FILE)), - nid, K(node_page_state(nid, NR_UNEVICTABLE)), - nid, K(node_page_state(nid, NR_MLOCK))); + nid, K(my_node_page_state(nid, NR_ACTIVE_ANON) + + my_node_page_state(nid, NR_ACTIVE_FILE)), + nid, K(my_node_page_state(nid, NR_INACTIVE_ANON) + + my_node_page_state(nid, NR_INACTIVE_FILE)), + nid, K(my_node_page_state(nid, NR_ACTIVE_ANON)), + nid, K(my_node_page_state(nid, NR_INACTIVE_ANON)), + nid, K(my_node_page_state(nid, NR_ACTIVE_FILE)), + nid, K(my_node_page_state(nid, NR_INACTIVE_FILE)), + nid, K(my_node_page_state(nid, NR_UNEVICTABLE)), + nid, K(my_node_page_state(nid, NR_MLOCK)));
#ifdef CONFIG_HIGHMEM n += sprintf(buf + n, @@ -123,30 +129,30 @@ static ssize_t node_read_meminfo(struct sys_device * dev, "Node %d AnonHugePages: %8lu kB\n" #endif , - nid, K(node_page_state(nid, NR_FILE_DIRTY)), - nid, K(node_page_state(nid, NR_WRITEBACK)), - nid, K(node_page_state(nid, NR_FILE_PAGES)), - nid, K(node_page_state(nid, NR_FILE_MAPPED)), - nid, K(node_page_state(nid, NR_ANON_PAGES) + nid, K(my_node_page_state(nid, NR_FILE_DIRTY)), + nid, K(my_node_page_state(nid, NR_WRITEBACK)), + nid, K(my_node_page_state(nid, NR_FILE_PAGES)), + nid, K(my_node_page_state(nid, NR_FILE_MAPPED)), + nid, K(my_node_page_state(nid, NR_ANON_PAGES) #ifdef CONFIG_TRANSPARENT_HUGEPAGE - + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * + + my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR #endif ), - nid, K(node_page_state(nid, NR_SHMEM)), - nid, node_page_state(nid, NR_KERNEL_STACK) * + nid, K(my_node_page_state(nid, NR_SHMEM)), + nid, my_node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, - nid, K(node_page_state(nid, NR_PAGETABLE)), - nid, K(node_page_state(nid, NR_UNSTABLE_NFS)), - nid, K(node_page_state(nid, NR_BOUNCE)), - nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)), - nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) + - node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), - nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), - nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)) + nid, K(my_node_page_state(nid, NR_PAGETABLE)), + nid, K(my_node_page_state(nid, NR_UNSTABLE_NFS)), + nid, K(my_node_page_state(nid, NR_BOUNCE)), + nid, K(my_node_page_state(nid, NR_WRITEBACK_TEMP)), + nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE) + + my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), + nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE)), + nid, K(my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE , nid, - K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * + K(my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR) #endif ); @@ -167,12 +173,12 @@ static ssize_t node_read_numastat(struct sys_device * dev, "interleave_hit %lu\n" "local_node %lu\n" "other_node %lu\n", - node_page_state(dev->id, NUMA_HIT), - node_page_state(dev->id, NUMA_MISS), - node_page_state(dev->id, NUMA_FOREIGN), - node_page_state(dev->id, NUMA_INTERLEAVE_HIT), - node_page_state(dev->id, NUMA_LOCAL), - node_page_state(dev->id, NUMA_OTHER)); + my_node_page_state(dev->id, NUMA_HIT), + my_node_page_state(dev->id, NUMA_MISS), + my_node_page_state(dev->id, NUMA_FOREIGN), + my_node_page_state(dev->id, NUMA_INTERLEAVE_HIT), + my_node_page_state(dev->id, NUMA_LOCAL), + my_node_page_state(dev->id, NUMA_OTHER)); } static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
From: Andi Kleen ak@linux.intel.com
Drop some inlines to shrink code size with force inline
Still some unfixed growth in:
balance_leaf 7190 8766 +1576 search_by_key 1963 3317 +1354
Cc: viro@zeniv.linux.org.uk Cc: fweisbec@gmail.com Signed-off-by: Andi Kleen ak@linux.intel.com --- fs/reiserfs/do_balan.c | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c index 60c0804..8b3c44c 100644 --- a/fs/reiserfs/do_balan.c +++ b/fs/reiserfs/do_balan.c @@ -21,7 +21,7 @@ #include <linux/buffer_head.h> #include <linux/kernel.h>
-static inline void buffer_info_init_left(struct tree_balance *tb, +static void buffer_info_init_left(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; @@ -30,7 +30,7 @@ static inline void buffer_info_init_left(struct tree_balance *tb, bi->bi_position = get_left_neighbor_position(tb, 0); }
-static inline void buffer_info_init_right(struct tree_balance *tb, +static void buffer_info_init_right(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; @@ -39,7 +39,7 @@ static inline void buffer_info_init_right(struct tree_balance *tb, bi->bi_position = get_right_neighbor_position(tb, 0); }
-static inline void buffer_info_init_tbS0(struct tree_balance *tb, +static void buffer_info_init_tbS0(struct tree_balance *tb, struct buffer_info *bi) { bi->tb = tb; @@ -48,7 +48,7 @@ static inline void buffer_info_init_tbS0(struct tree_balance *tb, bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); }
-static inline void buffer_info_init_bh(struct tree_balance *tb, +static void buffer_info_init_bh(struct tree_balance *tb, struct buffer_info *bi, struct buffer_head *bh) { @@ -58,7 +58,7 @@ static inline void buffer_info_init_bh(struct tree_balance *tb, bi->bi_position = 0; }
-inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, +void do_balance_mark_leaf_dirty(struct tree_balance *tb, struct buffer_head *bh, int flag) { journal_mark_dirty(tb->transaction_handle, @@ -1967,7 +1967,7 @@ static void check_internal_levels(struct tree_balance *tb)
*/
-static inline void do_balance_starts(struct tree_balance *tb) +static void do_balance_starts(struct tree_balance *tb) { /* use print_cur_tb() to see initial state of struct tree_balance */ @@ -1983,7 +1983,7 @@ static inline void do_balance_starts(struct tree_balance *tb) #endif }
-static inline void do_balance_completed(struct tree_balance *tb) +static void do_balance_completed(struct tree_balance *tb) {
#ifdef CONFIG_REISERFS_CHECK
From: Andi Kleen ak@linux.intel.com
With the tracing code in there they are far too big to inline.
.text savings compared to a non force inline kernel:
i915_restore_display 4393 12036 +7643 i915_save_display 4295 11459 +7164 i915_handle_error 2979 6666 +3687 i915_driver_irq_handler 2923 5086 +2163 i915_ringbuffer_info 458 1661 +1203 i915_save_vga - 1200 +1200 i915_driver_irq_uninstall 453 1624 +1171 i915_driver_irq_postinstall 913 2078 +1165 ironlake_enable_drps 719 1872 +1153 i915_restore_vga - 1142 +1142 intel_display_capture_error_state 784 2030 +1246 intel_init_emon 719 2016 +1297
and more ...
[AK: these are older numbers, with the new SNB forcewake checks it will be even worse]
Cc: keithp@keithp.com Signed-off-by: Andi Kleen ak@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 40 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 22 ++------------------ 2 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f07e425..c2de142 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -895,3 +895,43 @@ module_exit(i915_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL and additional rights"); + +/* We give fast paths for the really cool registers */ +#define NEEDS_FORCE_WAKE(dev_priv, reg) \ + (((dev_priv)->info->gen >= 6) && \ + ((reg) < 0x40000) && \ + ((reg) != FORCEWAKE)) + +#define __i915_read(x, y) \ +u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ + u##x val = 0; \ + if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ + gen6_gt_force_wake_get(dev_priv); \ + val = read##y(dev_priv->regs + reg); \ + gen6_gt_force_wake_put(dev_priv); \ + } else { \ + val = read##y(dev_priv->regs + reg); \ + } \ + trace_i915_reg_rw(false, reg, val, sizeof(val)); \ + return val; \ +} + +__i915_read(8, b) +__i915_read(16, w) +__i915_read(32, l) +__i915_read(64, q) +#undef __i915_read + +#define __i915_write(x, y) \ +void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ + trace_i915_reg_rw(true, reg, val, sizeof(val)); \ + if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ + __gen6_gt_wait_for_fifo(dev_priv); \ + } \ + write##y(val, dev_priv->regs + reg); \ +} +__i915_write(8, b) +__i915_write(16, w) +__i915_write(32, l) +__i915_write(64, q) +#undef __i915_write diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7916bd9..7d171ea 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv); ((reg) != FORCEWAKE))
#define __i915_read(x, y) \ -static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ - u##x val = 0; \ - if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - gen6_gt_force_wake_get(dev_priv); \ - val = read##y(dev_priv->regs + reg); \ - gen6_gt_force_wake_put(dev_priv); \ - } else { \ - val = read##y(dev_priv->regs + reg); \ - } \ - trace_i915_reg_rw(false, reg, val, sizeof(val)); \ - return val; \ -} + u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
__i915_read(8, b) __i915_read(16, w) @@ -1374,13 +1363,8 @@ __i915_read(64, q) #undef __i915_read
#define __i915_write(x, y) \ -static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \ - trace_i915_reg_rw(true, reg, val, sizeof(val)); \ - if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \ - __gen6_gt_wait_for_fifo(dev_priv); \ - } \ - write##y(val, dev_priv->regs + reg); \ -} + void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val); + __i915_write(8, b) __i915_write(16, w) __i915_write(32, l)
On Thu, Oct 13, 2011 at 04:08:51PM -0700, Andi Kleen wrote:
From: Andi Kleen ak@linux.intel.com
With the tracing code in there they are far too big to inline.
.text savings compared to a non force inline kernel:
i915_restore_display 4393 12036 +7643 i915_save_display 4295 11459 +7164 i915_handle_error 2979 6666 +3687 i915_driver_irq_handler 2923 5086 +2163 i915_ringbuffer_info 458 1661 +1203 i915_save_vga - 1200 +1200 i915_driver_irq_uninstall 453 1624 +1171 i915_driver_irq_postinstall 913 2078 +1165 ironlake_enable_drps 719 1872 +1153 i915_restore_vga - 1142 +1142 intel_display_capture_error_state 784 2030 +1246 intel_init_emon 719 2016 +1297
and more ...
[AK: these are older numbers, with the new SNB forcewake checks it will be even worse]
Cc: keithp@keithp.com Signed-off-by: Andi Kleen ak@linux.intel.com
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
On Thu, 13 Oct 2011 16:08:51 -0700 Andi Kleen andi@firstfloor.org wrote:
From: Andi Kleen ak@linux.intel.com
With the tracing code in there they are far too big to inline.
.text savings compared to a non force inline kernel:
i915_restore_display 4393 12036 +7643 i915_save_display 4295 11459 +7164 i915_handle_error 2979 6666 +3687 i915_driver_irq_handler 2923 5086 +2163 i915_ringbuffer_info 458 1661 +1203 i915_save_vga - 1200 +1200 i915_driver_irq_uninstall 453 1624 +1171 i915_driver_irq_postinstall 913 2078 +1165 ironlake_enable_drps 719 1872 +1153 i915_restore_vga - 1142 +1142 intel_display_capture_error_state 784 2030 +1246 intel_init_emon 719 2016 +1297
and more ...
[AK: these are older numbers, with the new SNB forcewake checks it will be even worse]
Cc: keithp@keithp.com Signed-off-by: Andi Kleen ak@linux.intel.com
drivers/gpu/drm/i915/i915_drv.c | 40 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 22 ++------------------ 2 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f07e425..c2de142 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -895,3 +895,43 @@ module_exit(i915_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL and additional rights");
+/* We give fast paths for the really cool registers */ +#define NEEDS_FORCE_WAKE(dev_priv, reg) \
- (((dev_priv)->info->gen >= 6) && \
- ((reg) < 0x40000) && \
- ((reg) != FORCEWAKE))
+#define __i915_read(x, y) \ +u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
- u##x val = 0; \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
gen6_gt_force_wake_get(dev_priv); \
val = read##y(dev_priv->regs + reg); \
gen6_gt_force_wake_put(dev_priv); \
- } else { \
val = read##y(dev_priv->regs + reg); \
- } \
- trace_i915_reg_rw(false, reg, val, sizeof(val)); \
- return val; \
+}
+__i915_read(8, b) +__i915_read(16, w) +__i915_read(32, l) +__i915_read(64, q) +#undef __i915_read
+#define __i915_write(x, y) \ +void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
- trace_i915_reg_rw(true, reg, val, sizeof(val)); \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
__gen6_gt_wait_for_fifo(dev_priv); \
- } \
- write##y(val, dev_priv->regs + reg); \
+} +__i915_write(8, b) +__i915_write(16, w) +__i915_write(32, l) +__i915_write(64, q) +#undef __i915_write diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7916bd9..7d171ea 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv); ((reg) != FORCEWAKE))
#define __i915_read(x, y) \ -static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
- u##x val = 0; \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
gen6_gt_force_wake_get(dev_priv); \
val = read##y(dev_priv->regs + reg); \
gen6_gt_force_wake_put(dev_priv); \
- } else { \
val = read##y(dev_priv->regs + reg); \
- } \
- trace_i915_reg_rw(false, reg, val, sizeof(val)); \
- return val; \
-}
- u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
__i915_read(8, b) __i915_read(16, w) @@ -1374,13 +1363,8 @@ __i915_read(64, q) #undef __i915_read
#define __i915_write(x, y) \ -static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
- trace_i915_reg_rw(true, reg, val, sizeof(val)); \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
__gen6_gt_wait_for_fifo(dev_priv); \
- } \
- write##y(val, dev_priv->regs + reg); \
-}
- void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
__i915_write(8, b) __i915_write(16, w) __i915_write(32, l)
Acked-by: Ben Widawsky ben@bwidawsk.net
The forcewake increased size should have been fixed a bit with the forcewake struct encapsulation patch I posted to intel-gfx mailing list. Keith, if you take this, could you also look into that patch? 1315951648-5380-1-git-send-email-ben@bwidawsk.net
From: Andi Kleen ak@linux.intel.com
I found that gcc 4.5 didn't inline a lot of inlines with CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE. It was quite common to have very small inlines to be out of line, or worse inline statics in include files to be out of line with a copy for every file using it too.
This is handily visible in a function graph trace for might_fault:
10) | might_fault() { 10) | _cond_resched() { 10) | should_resched() { 10) | need_resched() { 10) 0.063 us | test_ti_thread_flag(); 10) 0.643 us | } 10) 1.238 us | } 10) 1.845 us | } 10) 2.438 us | }
Note all of these functions are very small and should be definitely inlined in each other. In many cases even copy_from_user ends up out of line now which is really bad!
If I switch to -O2 it is also not quite as bad, but since a lot of people use -Os I was trying to fix it up.
So this patch forces inlining with gcc 4.5 with -Os.
Unfortunately it costs some code size with just this patch.
text data bss dec hex filename 11507035 1940276 1191936 14639247 df608f vmlinux-O2 10189858 1908124 1187840 13285822 cab9be vmlinux-Os-force 9808525 1940204 1187840 12936569 c56579 vmlinux-Os-orig
It turned out most of this was because of unnecessary inlines. With a lot of inlines removed I now get:
11175824 1977200 1191936 14344960 dae300 vmlinux-inlines-removed-no-optimize 11642530 2018416 1191936 14852882 e2a312 vmlinux-master-no-optimize 11530439 2001264 1191936 14723639 e0aa37 vmlinux-master-optimize
which is significantly smaller.
I haven't tested earlier gcc 4.x versions, but they may need the same treatment.
Signed-off-by: Andi Kleen ak@linux.intel.com --- include/linux/compiler-gcc.h | 5 ++++- 1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 59e4028..e477a7c 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -44,9 +44,12 @@ /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old: + * When optimizing for size on gcc 4.5 always force inlining too. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ - !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) + !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) || \ + (defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) && \ + (__GNUC__ == 4 && __GNUC_MINOR__ == 5)) # define inline inline __attribute__((always_inline)) # define __inline__ __inline__ __attribute__((always_inline)) # define __inline __inline __attribute__((always_inline))
From: Andi Kleen ak@linux.intel.com
Hi Andi,
I've merged all the RADEON: patches to drm-next locally, with one minor change (moving some of the out-of-lines to a more appropriate place).
Thanks, Dave.
dri-devel@lists.freedesktop.org