From: Ville Syrjälä ville.syrjala@linux.intel.com
Some polishings and at least one minor fix for the gamma stuff.
Ville Syrjälä (12): drm: Inline drm_color_lut_extract() drm/i915: Polish CHV .load_luts() a bit drm/i915: Polish CHV CGM CSC loading drm/i915: Add i9xx_lut_8() drm/i915: Clean up i9xx_load_luts_internal() drm/i915: Split i9xx_read_lut_8() to gmch vs. ilk variants drm/i915: s/blob_data/lut/ drm/i915: s/chv_read_cgm_lut/chv_read_cgm_gamma/ drm/i915: Clean up integer types in color code drm/i915: Refactor LUT read functions drm/i915: Fix readout of PIPEGCMAX drm/i915: Pass the crtc to the low level read_lut() funcs
drivers/gpu/drm/drm_color_mgmt.c | 24 -- drivers/gpu/drm/i915/display/intel_color.c | 435 +++++++++++---------- drivers/gpu/drm/i915/i915_reg.h | 1 - include/drm/drm_color_mgmt.h | 23 +- 4 files changed, 258 insertions(+), 225 deletions(-)
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to: - avoid the function call overhead - allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@ * standard enum values supported by the DRM plane. */
-/** - * drm_color_lut_extract - clamp and round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} -EXPORT_SYMBOL(drm_color_lut_extract); - /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/** + * drm_color_lut_extract - clamp and round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{ + u32 val = user_input; + u32 max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size,
On 2019-11-07 10:17 a.m., Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
Reviewed-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
Seems reasonable to me. It would probably make sense to even split this further into two functions, one for high precision and one for low precision so it's purely a calculation and not hitting any branches.
Nicholas Kazlauskas
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@
- standard enum values supported by the DRM plane.
*/
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
- /**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size,
On Thu, Nov 07, 2019 at 03:31:28PM +0000, Kazlauskas, Nicholas wrote:
On 2019-11-07 10:17 a.m., Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
Reviewed-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
Seems reasonable to me. It would probably make sense to even split this further into two functions, one for high precision and one for low precision so it's purely a calculation and not hitting any branches.
Constant folding gets rid of it.
Nicholas Kazlauskas
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@
- standard enum values supported by the DRM plane.
*/
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
- /**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size,
On 2019-11-07 10:43 a.m., Ville Syrjälä wrote:
On Thu, Nov 07, 2019 at 03:31:28PM +0000, Kazlauskas, Nicholas wrote:
On 2019-11-07 10:17 a.m., Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
Reviewed-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com
Seems reasonable to me. It would probably make sense to even split this further into two functions, one for high precision and one for low precision so it's purely a calculation and not hitting any branches.
Constant folding gets rid of it.
I realized after sending that email that moving this to inline is probably allowing the compiler to optimize this out and give you that large speedup in the first place. Though branch prediction probably helped cut down on the cost even when it wasn't inline.
This is fine as is then, thanks.
Nicholas Kazlauskas
Nicholas Kazlauskas
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++- 2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@ * standard enum values supported by the DRM plane. */
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
- /**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size,
On Thu, Nov 07, 2019 at 05:17:14PM +0200, Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++-
You forgot to add the include stanza in the kerneldoc .rst files, which means this is now lost from the output. Please fix. -Daniel
2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@
- standard enum values supported by the DRM plane.
*/
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
/**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, -- 2.23.0
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Thu, Nov 07, 2019 at 06:40:14PM +0100, Daniel Vetter wrote:
On Thu, Nov 07, 2019 at 05:17:14PM +0200, Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++-
You forgot to add the include stanza in the kerneldoc .rst files, which means this is now lost from the output. Please fix.
Aye. A bit funny that we already have a bunch of other kerneldocs in that header but it's not included in the .rst.
-Daniel
2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@
- standard enum values supported by the DRM plane.
*/
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
/**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, -- 2.23.0
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On Fri, Nov 08, 2019 at 03:36:57PM +0200, Ville Syrjälä wrote:
On Thu, Nov 07, 2019 at 06:40:14PM +0100, Daniel Vetter wrote:
On Thu, Nov 07, 2019 at 05:17:14PM +0200, Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to:
- avoid the function call overhead
- allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++-
You forgot to add the include stanza in the kerneldoc .rst files, which means this is now lost from the output. Please fix.
Aye. A bit funny that we already have a bunch of other kerneldocs in that header but it's not included in the .rst.
kerneldoc complains if there's no kerneldoc, which is often the case for headers. So we start out without the header included, and then someone misses that when adding the first/second/... kerneldoc.
It's rather annoying unfortunately :-/ -Daniel
-Daniel
2 files changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@
- standard enum values supported by the DRM plane.
*/
-/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{
- uint32_t val = user_input;
- uint32_t max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
-} -EXPORT_SYMBOL(drm_color_lut_extract);
/**
- drm_crtc_enable_color_mgmt - enable color management properties
- @crtc: DRM CRTC
diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/**
- drm_color_lut_extract - clamp and round LUT entries
- @user_input: input value
- @bit_precision: number of bits the hw LUT supports
- Extract a degamma/gamma LUT value provided by user (in the form of
- &drm_color_lut entries) and round it to the precision supported by the
- hardware.
- */
+static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{
- u32 val = user_input;
- u32 max = 0xffff >> (16 - bit_precision);
- /* Round only if we're not using full precision. */
- if (bit_precision < 16) {
val += 1UL << (16 - bit_precision - 1);
val >>= 16 - bit_precision;
- }
- return clamp_val(val, 0, max);
+}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, -- 2.23.0
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
-- Ville Syrjälä Intel
From: Ville Syrjälä ville.syrjala@linux.intel.com
This thing can get called several thousand times per LUT so seems like we want to inline it to: - avoid the function call overhead - allow constant folding
A quick synthetic test (w/o any hardware interaction) with a ridiculously large LUT size shows about 50% reduction in runtime on my HSW and BSW boxes. Slightly less with more reasonable LUT size but still easily measurable in tens of microseconds.
v2: Include drm_color_mgmt.h in the .rst (Daniel)
Cc: Daniel Vetter daniel@ffwll.ch Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com Reviewed-by: Nicholas Kazlauskas nicholas.kazlauskas@amd.com --- Documentation/gpu/drm-kms.rst | 3 +++ drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 23 ++++++++++++++++++++++- 3 files changed, 25 insertions(+), 25 deletions(-)
diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 23a3c986ef6d..c68588ce4090 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -479,6 +479,9 @@ Color Management Properties .. kernel-doc:: drivers/gpu/drm/drm_color_mgmt.c :export:
+.. kernel-doc:: include/drm/drm_color_mgmt.h + :internal: + Tile Group Property -------------------
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ce5c6d8de99..19c5f635992a 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -108,30 +108,6 @@ * standard enum values supported by the DRM plane. */
-/** - * drm_color_lut_extract - clamp and round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} -EXPORT_SYMBOL(drm_color_lut_extract); - /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index d1c662d92ab7..069b21d61871 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -29,7 +29,28 @@ struct drm_crtc; struct drm_plane;
-uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); +/** + * drm_color_lut_extract - clamp and round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +static inline u32 drm_color_lut_extract(u32 user_input, int bit_precision) +{ + u32 val = user_input; + u32 max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +}
void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size,
From: Ville Syrjälä ville.syrjala@linux.intel.com
It irks me to use crtc_state_is_legacy_gamma() inside the guts of the CHV color management code. Let's get rid of it and instead just consult cgm_mode to figure out if we want to enable the pipe gamma or the CGM gamma.
Also CHV display engine is based on i965/g4x so we should fall back to the i965 path when the CGM gamma is not used.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 3980e8b50c28..d8ee90b7774a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -996,16 +996,13 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state)
cherryview_load_csc_matrix(crtc_state);
- if (crtc_state_is_legacy_gamma(crtc_state)) { - i9xx_load_luts(crtc_state); - return; - } - - if (degamma_lut) + if (crtc_state->cgm_mode & CGM_PIPE_MODE_DEGAMMA) chv_load_cgm_degamma(crtc, degamma_lut);
- if (gamma_lut) + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) chv_load_cgm_gamma(crtc, gamma_lut); + else + i965_load_luts(crtc_state); }
void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
Hi Ville, Can you please rebase the series? There are intel_de_write() changes in existing code.
On 07-Nov-19 8:47 PM, Ville Syrjala wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
It irks me to use crtc_state_is_legacy_gamma() inside the guts of the CHV color management code. Let's get rid of it and instead just consult cgm_mode to figure out if we want to enable the pipe gamma or the CGM gamma.
Also CHV display engine is based on i965/g4x so we should fall back to the i965 path when the CGM gamma is not used.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com
drivers/gpu/drm/i915/display/intel_color.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 3980e8b50c28..d8ee90b7774a 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -996,16 +996,13 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state)
cherryview_load_csc_matrix(crtc_state);
- if (crtc_state_is_legacy_gamma(crtc_state)) {
i9xx_load_luts(crtc_state);
return;
- }
- if (degamma_lut)
- if (crtc_state->cgm_mode & CGM_PIPE_MODE_DEGAMMA) chv_load_cgm_degamma(crtc, degamma_lut);
- if (gamma_lut)
if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) chv_load_cgm_gamma(crtc, gamma_lut);
else
i965_load_luts(crtc_state);
}
void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
Reviewed-by: Swati Sharma swati2.sharma@intel.com
From: Ville Syrjälä ville.syrjala@linux.intel.com
Only load the CGM CSC based on the cgm_mode bit like we do with the gamma/degamma LUTs. And make the function naming and arguments consistent as well.
TODO: the code to convert the coefficients look totally bogus. IIRC CHV uses two's complement format but the code certainly doesn't generate that, so probably negative coefficients are totally busted.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 69 ++++++++++------------ 1 file changed, 32 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index d8ee90b7774a..f20809d91f85 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -333,48 +333,38 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state) I915_WRITE(PIPE_CSC_MODE(crtc->pipe), crtc_state->csc_mode); }
-/* - * Set up the pipe CSC unit on CherryView. - */ -static void cherryview_load_csc_matrix(const struct intel_crtc_state *crtc_state) +static void chv_load_cgm_csc(struct intel_crtc *crtc, + const struct drm_property_blob *blob) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + const struct drm_color_ctm *ctm = blob->data; struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; + u16 coeffs[9]; + int i;
- if (crtc_state->hw.ctm) { - const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data; - u16 coeffs[9] = {}; - int i; - - for (i = 0; i < ARRAY_SIZE(coeffs); i++) { - u64 abs_coeff = - ((1ULL << 63) - 1) & ctm->matrix[i]; - - /* Round coefficient. */ - abs_coeff += 1 << (32 - 13); - /* Clamp to hardware limits. */ - abs_coeff = clamp_val(abs_coeff, 0, CTM_COEFF_8_0 - 1); - - /* Write coefficients in S3.12 format. */ - if (ctm->matrix[i] & (1ULL << 63)) - coeffs[i] = 1 << 15; - coeffs[i] |= ((abs_coeff >> 32) & 7) << 12; - coeffs[i] |= (abs_coeff >> 20) & 0xfff; - } + for (i = 0; i < ARRAY_SIZE(coeffs); i++) { + u64 abs_coeff = ((1ULL << 63) - 1) & ctm->matrix[i];
- I915_WRITE(CGM_PIPE_CSC_COEFF01(pipe), - coeffs[1] << 16 | coeffs[0]); - I915_WRITE(CGM_PIPE_CSC_COEFF23(pipe), - coeffs[3] << 16 | coeffs[2]); - I915_WRITE(CGM_PIPE_CSC_COEFF45(pipe), - coeffs[5] << 16 | coeffs[4]); - I915_WRITE(CGM_PIPE_CSC_COEFF67(pipe), - coeffs[7] << 16 | coeffs[6]); - I915_WRITE(CGM_PIPE_CSC_COEFF8(pipe), coeffs[8]); + /* Round coefficient. */ + abs_coeff += 1 << (32 - 13); + /* Clamp to hardware limits. */ + abs_coeff = clamp_val(abs_coeff, 0, CTM_COEFF_8_0 - 1); + + coeffs[i] = 0; + + /* Write coefficients in S3.12 format. */ + if (ctm->matrix[i] & (1ULL << 63)) + coeffs[i] |= 1 << 15; + + coeffs[i] |= ((abs_coeff >> 32) & 7) << 12; + coeffs[i] |= (abs_coeff >> 20) & 0xfff; }
- I915_WRITE(CGM_PIPE_MODE(pipe), crtc_state->cgm_mode); + I915_WRITE(CGM_PIPE_CSC_COEFF01(pipe), coeffs[1] << 16 | coeffs[0]); + I915_WRITE(CGM_PIPE_CSC_COEFF23(pipe), coeffs[3] << 16 | coeffs[2]); + I915_WRITE(CGM_PIPE_CSC_COEFF45(pipe), coeffs[5] << 16 | coeffs[4]); + I915_WRITE(CGM_PIPE_CSC_COEFF67(pipe), coeffs[7] << 16 | coeffs[6]); + I915_WRITE(CGM_PIPE_CSC_COEFF8(pipe), coeffs[8]); }
/* i965+ "10.6" bit interpolated format "even DW" (low 8 bits) */ @@ -991,10 +981,13 @@ static void chv_load_cgm_gamma(struct intel_crtc *crtc, static void chv_load_luts(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut; + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + const struct drm_property_blob *ctm = crtc_state->hw.ctm;
- cherryview_load_csc_matrix(crtc_state); + if (crtc_state->cgm_mode & CGM_PIPE_MODE_CSC) + chv_load_cgm_csc(crtc, ctm);
if (crtc_state->cgm_mode & CGM_PIPE_MODE_DEGAMMA) chv_load_cgm_degamma(crtc, degamma_lut); @@ -1003,6 +996,8 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state) chv_load_cgm_gamma(crtc, gamma_lut); else i965_load_luts(crtc_state); + + I915_WRITE(CGM_PIPE_MODE(crtc->pipe), crtc_state->cgm_mode); }
void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
From: Ville Syrjälä ville.syrjala@linux.intel.com
We have a nice little helper to compute a single LUT entry for everything except the 8bpc legacy gamma mode. Let's complete the set.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index f20809d91f85..5443b8ec0a4c 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -367,6 +367,13 @@ static void chv_load_cgm_csc(struct intel_crtc *crtc, I915_WRITE(CGM_PIPE_CSC_COEFF8(pipe), coeffs[8]); }
+static u32 i9xx_lut_8(const struct drm_color_lut *color) +{ + return drm_color_lut_extract(color->red, 8) << 16 | + drm_color_lut_extract(color->green, 8) << 8 | + drm_color_lut_extract(color->blue, 8); +} + /* i965+ "10.6" bit interpolated format "even DW" (low 8 bits) */ static u32 i965_lut_10p6_ldw(const struct drm_color_lut *color) { @@ -410,10 +417,7 @@ static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, const struct drm_color_lut *lut = blob->data;
for (i = 0; i < 256; i++) { - u32 word = - (drm_color_lut_extract(lut[i].red, 8) << 16) | - (drm_color_lut_extract(lut[i].green, 8) << 8) | - drm_color_lut_extract(lut[i].blue, 8); + u32 word = i9xx_lut_8(&lut[i]);
if (HAS_GMCH(dev_priv)) I915_WRITE(PALETTE(pipe, i), word);
On Thu, 7 Nov 2019 at 15:17, Ville Syrjala ville.syrjala@linux.intel.com wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
We have a nice little helper to compute a single LUT entry for everything except the 8bpc legacy gamma mode. Let's complete the set.
At a later stage one could rename this & the 10bit one, moving them to include/drm/. There are other drivers doing the same thing... not sure if that's worth it though.
Reviewed-by: Emil Velikov emil.velikov@collabora.com
-Emil
On Thu, Feb 20, 2020 at 11:20:05AM +0000, Emil Velikov wrote:
On Thu, 7 Nov 2019 at 15:17, Ville Syrjala ville.syrjala@linux.intel.com wrote:
From: Ville Syrjälä ville.syrjala@linux.intel.com
We have a nice little helper to compute a single LUT entry for everything except the 8bpc legacy gamma mode. Let's complete the set.
At a later stage one could rename this & the 10bit one, moving them to include/drm/. There are other drivers doing the same thing... not sure if that's worth it though.
I'd say no. These are specifically about formatting the LUT entry for the hw register. I don't really see much benefit from sharing code to compute hw register values across totally different hardware, even if the bits happen to match by accident.
The only good exception I can think of are cases where said register value comes more or less straight from some cross vendor spec.
From: Ville Syrjälä ville.syrjala@linux.intel.com
Split i9xx_load_luts_internal() into neat gmch vs. ilk+ chunks. Avoids at least one branch in the inner loop, and makes life a bit less confusing.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 98 +++++++++++++--------- 1 file changed, 57 insertions(+), 41 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 5443b8ec0a4c..992290a07086 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -397,41 +397,6 @@ static u32 ilk_lut_10(const struct drm_color_lut *color) drm_color_lut_extract(color->blue, 10); }
-/* Loads the legacy palette/gamma unit for the CRTC. */ -static void i9xx_load_luts_internal(const struct intel_crtc_state *crtc_state, - const struct drm_property_blob *blob) -{ - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum pipe pipe = crtc->pipe; - int i; - - if (HAS_GMCH(dev_priv)) { - if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) - assert_dsi_pll_enabled(dev_priv); - else - assert_pll_enabled(dev_priv, pipe); - } - - if (blob) { - const struct drm_color_lut *lut = blob->data; - - for (i = 0; i < 256; i++) { - u32 word = i9xx_lut_8(&lut[i]); - - if (HAS_GMCH(dev_priv)) - I915_WRITE(PALETTE(pipe, i), word); - else - I915_WRITE(LGC_PALETTE(pipe, i), word); - } - } -} - -static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) -{ - i9xx_load_luts_internal(crtc_state, crtc_state->hw.gamma_lut); -} - static void i9xx_color_commit(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -496,6 +461,34 @@ static void skl_color_commit(const struct intel_crtc_state *crtc_state) ilk_load_csc_matrix(crtc_state); }
+static void i9xx_load_lut_8(struct intel_crtc *crtc, + const struct drm_property_blob *blob) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_color_lut *lut; + enum pipe pipe = crtc->pipe; + int i; + + if (!blob) + return; + + lut = blob->data; + + for (i = 0; i < 256; i++) + I915_WRITE(PALETTE(pipe, i), i9xx_lut_8(&lut[i])); +} + +static void i9xx_load_luts(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut; + + assert_pll_enabled(dev_priv, crtc->pipe); + + i9xx_load_lut_8(crtc, gamma_lut); +} + static void i965_load_lut_10p6(struct intel_crtc *crtc, const struct drm_property_blob *blob) { @@ -519,14 +512,37 @@ static void i965_load_lut_10p6(struct intel_crtc *crtc, static void i965_load_luts(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
+ if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) + assert_dsi_pll_enabled(dev_priv); + else + assert_pll_enabled(dev_priv, crtc->pipe); + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - i9xx_load_luts(crtc_state); + i9xx_load_lut_8(crtc, gamma_lut); else i965_load_lut_10p6(crtc, gamma_lut); }
+static void ilk_load_lut_8(struct intel_crtc *crtc, + const struct drm_property_blob *blob) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_color_lut *lut; + enum pipe pipe = crtc->pipe; + int i; + + if (!blob) + return; + + lut = blob->data; + + for (i = 0; i < 256; i++) + I915_WRITE(LGC_PALETTE(pipe, i), i9xx_lut_8(&lut[i])); +} + static void ilk_load_lut_10(struct intel_crtc *crtc, const struct drm_property_blob *blob) { @@ -545,7 +561,7 @@ static void ilk_load_luts(const struct intel_crtc_state *crtc_state) const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - i9xx_load_luts(crtc_state); + ilk_load_lut_8(crtc, gamma_lut); else ilk_load_lut_10(crtc, gamma_lut); } @@ -653,7 +669,7 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state) const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { - i9xx_load_luts(crtc_state); + ilk_load_lut_8(crtc, gamma_lut); } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) { ivb_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(0)); @@ -676,7 +692,7 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state) const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { - i9xx_load_luts(crtc_state); + ilk_load_lut_8(crtc, gamma_lut); } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) { bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE | PAL_PREC_INDEX_VALUE(0)); @@ -777,7 +793,7 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) glk_load_degamma_lut_linear(crtc_state);
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) { - i9xx_load_luts(crtc_state); + ilk_load_lut_8(crtc, gamma_lut); } else { bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); @@ -913,7 +929,7 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { case GAMMA_MODE_MODE_8BIT: - i9xx_load_luts(crtc_state); + ilk_load_lut_8(crtc, gamma_lut); break; case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: icl_program_gamma_superfine_segment(crtc_state);
From: Ville Syrjälä ville.syrjala@linux.intel.com
To mirror the load_luts path let's clone an ilk+ version from i9xx_read_lut_8(). I guess the extra branch isn't a huge issue but feels better to make a clean split.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 41 ++++++++++++++++++---- 1 file changed, 35 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 992290a07086..5890e3896f8d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1674,10 +1674,7 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) blob_data = blob->data;
for (i = 0; i < LEGACY_LUT_LENGTH; i++) { - if (HAS_GMCH(dev_priv)) - val = I915_READ(PALETTE(pipe, i)); - else - val = I915_READ(LGC_PALETTE(pipe, i)); + val = I915_READ(PALETTE(pipe, i));
blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_RED_MASK, val), 8); @@ -1792,6 +1789,38 @@ static void chv_read_luts(struct intel_crtc_state *crtc_state) i965_read_luts(crtc_state); }
+static struct drm_property_blob * +ilk_read_lut_8(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + struct drm_property_blob *blob; + struct drm_color_lut *blob_data; + u32 i, val; + + blob = drm_property_create_blob(&dev_priv->drm, + sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, + NULL); + if (IS_ERR(blob)) + return NULL; + + blob_data = blob->data; + + for (i = 0; i < LEGACY_LUT_LENGTH; i++) { + val = I915_READ(LGC_PALETTE(pipe, i)); + + blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_RED_MASK, val), 8); + blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_GREEN_MASK, val), 8); + blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + LGC_PALETTE_BLUE_MASK, val), 8); + } + + return blob; +} + static struct drm_property_blob * ilk_read_lut_10(const struct intel_crtc_state *crtc_state) { @@ -1834,7 +1863,7 @@ static void ilk_read_luts(struct intel_crtc_state *crtc_state) return;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc_state); else crtc_state->hw.gamma_lut = ilk_read_lut_10(crtc_state); } @@ -1883,7 +1912,7 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state) return;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc_state); else crtc_state->hw.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); }
From: Ville Syrjälä ville.syrjala@linux.intel.com
We're talking about LUT contents here so let's call the thing 'lut' rather than 'blob_data'. This is the name the load_lut() code used before already.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 66 +++++++++++----------- 1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 5890e3896f8d..43435ed343f2 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1662,7 +1662,7 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1671,16 +1671,16 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
for (i = 0; i < LEGACY_LUT_LENGTH; i++) { val = I915_READ(PALETTE(pipe, i));
- blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + lut[i].red = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_RED_MASK, val), 8); - blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + lut[i].green = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_GREEN_MASK, val), 8); - blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_BLUE_MASK, val), 8); }
@@ -1703,7 +1703,7 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val1, val2;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1712,25 +1712,25 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
for (i = 0; i < lut_size - 1; i++) { val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); val2 = I915_READ(PALETTE(pipe, 2 * i + 1));
- blob_data[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | + lut[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | REG_FIELD_GET(PALETTE_RED_MASK, val1); - blob_data[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | + lut[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | REG_FIELD_GET(PALETTE_GREEN_MASK, val1); - blob_data[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | + lut[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | REG_FIELD_GET(PALETTE_BLUE_MASK, val1); }
- blob_data[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + lut[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, I915_READ(PIPEGCMAX(pipe, 0))); - blob_data[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + lut[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, I915_READ(PIPEGCMAX(pipe, 1))); - blob_data[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, + lut[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, I915_READ(PIPEGCMAX(pipe, 2)));
return blob; @@ -1755,7 +1755,7 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1764,17 +1764,17 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
for (i = 0; i < lut_size; i++) { val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); - blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + lut[i].green = intel_color_lut_pack(REG_FIELD_GET( CGM_PIPE_GAMMA_GREEN_MASK, val), 10); - blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( CGM_PIPE_GAMMA_BLUE_MASK, val), 10);
val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1)); - blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + lut[i].red = intel_color_lut_pack(REG_FIELD_GET( CGM_PIPE_GAMMA_RED_MASK, val), 10); }
@@ -1796,7 +1796,7 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1805,16 +1805,16 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
for (i = 0; i < LEGACY_LUT_LENGTH; i++) { val = I915_READ(LGC_PALETTE(pipe, i));
- blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + lut[i].red = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_RED_MASK, val), 8); - blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + lut[i].green = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_GREEN_MASK, val), 8); - blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_BLUE_MASK, val), 8); }
@@ -1829,7 +1829,7 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1838,16 +1838,16 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
for (i = 0; i < lut_size; i++) { val = I915_READ(PREC_PALETTE(pipe, i));
- blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + lut[i].red = intel_color_lut_pack(REG_FIELD_GET( PREC_PALETTE_RED_MASK, val), 10); - blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + lut[i].green = intel_color_lut_pack(REG_FIELD_GET( PREC_PALETTE_GREEN_MASK, val), 10); - blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( PREC_PALETTE_BLUE_MASK, val), 10); }
@@ -1876,7 +1876,7 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) int hw_lut_size = ivb_lut_10_size(prec_index); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; - struct drm_color_lut *blob_data; + struct drm_color_lut *lut; u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, @@ -1885,7 +1885,7 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) if (IS_ERR(blob)) return NULL;
- blob_data = blob->data; + lut = blob->data;
I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | PAL_PREC_AUTO_INCREMENT); @@ -1893,11 +1893,11 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) for (i = 0; i < hw_lut_size; i++) { val = I915_READ(PREC_PAL_DATA(pipe));
- blob_data[i].red = intel_color_lut_pack(REG_FIELD_GET( + lut[i].red = intel_color_lut_pack(REG_FIELD_GET( PREC_PAL_DATA_RED_MASK, val), 10); - blob_data[i].green = intel_color_lut_pack(REG_FIELD_GET( + lut[i].green = intel_color_lut_pack(REG_FIELD_GET( PREC_PAL_DATA_GREEN_MASK, val), 10); - blob_data[i].blue = intel_color_lut_pack(REG_FIELD_GET( + lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( PREC_PAL_DATA_BLUE_MASK, val), 10); }
From: Ville Syrjälä ville.syrjala@linux.intel.com
chv_read_cgm_lut() specifically reads the CGM _gamma_ LUT so let's rename it to reflect that fact. This also mirrors the other direction's chv_load_cgm_gamma().
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 43435ed343f2..30c0b939620c 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1748,7 +1748,7 @@ static void i965_read_luts(struct intel_crtc_state *crtc_state) }
static struct drm_property_blob * -chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) +chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1784,7 +1784,7 @@ chv_read_cgm_lut(const struct intel_crtc_state *crtc_state) static void chv_read_luts(struct intel_crtc_state *crtc_state) { if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) - crtc_state->hw.gamma_lut = chv_read_cgm_lut(crtc_state); + crtc_state->hw.gamma_lut = chv_read_cgm_gamma(crtc_state); else i965_read_luts(crtc_state); }
From: Ville Syrjälä ville.syrjala@linux.intel.com
A variable called 'i' having an unsigned type is just looking for trouble, and using a sized type generally makes no sense either. Change all of them to just plain old int. And do the same for some 'lut_size' variables which generally provide the loop end codition for 'i'.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 42 ++++++++++------------ 1 file changed, 19 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 30c0b939620c..d6a20d7522a9 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -713,9 +713,8 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; + int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; const struct drm_color_lut *lut = crtc_state->hw.degamma_lut->data; - u32 i;
/* * When setting the auto-increment bit, the hardware seems to @@ -752,8 +751,7 @@ static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_stat struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - const u32 lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - u32 i; + int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
/* * When setting the auto-increment bit, the hardware seems to @@ -837,7 +835,7 @@ icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) const struct drm_color_lut *lut = blob->data; struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; - u32 i; + int i;
/* * Program Super Fine segment (let's call it seg1)... @@ -870,7 +868,7 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) const struct drm_color_lut *entry; struct intel_dsb *dsb = intel_dsb_get(crtc); enum pipe pipe = crtc->pipe; - u32 i; + int i;
/* * Program Fine segment (let's call it seg2)... @@ -1643,7 +1641,7 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, }
/* convert hw value with given bit_precision to lut property val */ -static u32 intel_color_lut_pack(u32 val, u32 bit_precision) +static u32 intel_color_lut_pack(u32 val, int bit_precision) { u32 max = 0xffff >> (16 - bit_precision);
@@ -1663,7 +1661,7 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val; + int i;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, @@ -1674,7 +1672,7 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < LEGACY_LUT_LENGTH; i++) { - val = I915_READ(PALETTE(pipe, i)); + u32 val = I915_READ(PALETTE(pipe, i));
lut[i].red = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_RED_MASK, val), 8); @@ -1700,11 +1698,10 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val1, val2;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * lut_size, @@ -1715,8 +1712,8 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < lut_size - 1; i++) { - val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); - val2 = I915_READ(PALETTE(pipe, 2 * i + 1)); + u32 val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); + u32 val2 = I915_READ(PALETTE(pipe, 2 * i + 1));
lut[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | REG_FIELD_GET(PALETTE_RED_MASK, val1); @@ -1752,11 +1749,10 @@ chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * lut_size, @@ -1767,6 +1763,8 @@ chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < lut_size; i++) { + u32 val; + val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); lut[i].green = intel_color_lut_pack(REG_FIELD_GET( CGM_PIPE_GAMMA_GREEN_MASK, val), 10); @@ -1797,7 +1795,7 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val; + int i;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH, @@ -1808,7 +1806,7 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < LEGACY_LUT_LENGTH; i++) { - val = I915_READ(LGC_PALETTE(pipe, i)); + u32 val = I915_READ(LGC_PALETTE(pipe, i));
lut[i].red = intel_color_lut_pack(REG_FIELD_GET( LGC_PALETTE_RED_MASK, val), 8); @@ -1826,11 +1824,10 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - u32 lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * lut_size, @@ -1841,7 +1838,7 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < lut_size; i++) { - val = I915_READ(PREC_PALETTE(pipe, i)); + u32 val = I915_READ(PREC_PALETTE(pipe, i));
lut[i].red = intel_color_lut_pack(REG_FIELD_GET( PREC_PALETTE_RED_MASK, val), 10); @@ -1873,11 +1870,10 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - int hw_lut_size = ivb_lut_10_size(prec_index); + int i, hw_lut_size = ivb_lut_10_size(prec_index); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; struct drm_color_lut *lut; - u32 i, val;
blob = drm_property_create_blob(&dev_priv->drm, sizeof(struct drm_color_lut) * hw_lut_size, @@ -1891,7 +1887,7 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) PAL_PREC_AUTO_INCREMENT);
for (i = 0; i < hw_lut_size; i++) { - val = I915_READ(PREC_PAL_DATA(pipe)); + u32 val = I915_READ(PREC_PAL_DATA(pipe));
lut[i].red = intel_color_lut_pack(REG_FIELD_GET( PREC_PAL_DATA_RED_MASK, val), 10);
From: Ville Syrjälä ville.syrjala@linux.intel.com
Extract all the 'hw value -> LUT entry' stuff into small helpers to make the main 'read out the entire LUT' loop less bogged down by such mundane details.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 128 ++++++++++----------- 1 file changed, 64 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index d6a20d7522a9..4b2bd5ac0e8d 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -367,6 +367,19 @@ static void chv_load_cgm_csc(struct intel_crtc *crtc, I915_WRITE(CGM_PIPE_CSC_COEFF8(pipe), coeffs[8]); }
+/* convert hw value with given bit_precision to lut property val */ +static u32 intel_color_lut_pack(u32 val, int bit_precision) +{ + u32 max = 0xffff >> (16 - bit_precision); + + val = clamp_val(val, 0, max); + + if (bit_precision < 16) + val <<= 16 - bit_precision; + + return val; +} + static u32 i9xx_lut_8(const struct drm_color_lut *color) { return drm_color_lut_extract(color->red, 8) << 16 | @@ -374,6 +387,13 @@ static u32 i9xx_lut_8(const struct drm_color_lut *color) drm_color_lut_extract(color->blue, 8); }
+static void i9xx_lut_8_pack(struct drm_color_lut *entry, u32 val) +{ + entry->red = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_RED_MASK, val), 8); + entry->green = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_GREEN_MASK, val), 8); + entry->blue = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_BLUE_MASK, val), 8); +} + /* i965+ "10.6" bit interpolated format "even DW" (low 8 bits) */ static u32 i965_lut_10p6_ldw(const struct drm_color_lut *color) { @@ -390,6 +410,21 @@ static u32 i965_lut_10p6_udw(const struct drm_color_lut *color) (color->blue >> 8); }
+static void i965_lut_10p6_pack(struct drm_color_lut *entry, u32 ldw, u32 udw) +{ + entry->red = REG_FIELD_GET(PALETTE_RED_MASK, udw) << 8 | + REG_FIELD_GET(PALETTE_RED_MASK, ldw); + entry->green = REG_FIELD_GET(PALETTE_GREEN_MASK, udw) << 8 | + REG_FIELD_GET(PALETTE_GREEN_MASK, ldw); + entry->blue = REG_FIELD_GET(PALETTE_BLUE_MASK, udw) << 8 | + REG_FIELD_GET(PALETTE_BLUE_MASK, ldw); +} + +static u16 i965_lut_11p6_max_pack(u32 val) +{ + return REG_FIELD_GET(PIPEGCMAX_RGB_MASK, val); +} + static u32 ilk_lut_10(const struct drm_color_lut *color) { return drm_color_lut_extract(color->red, 10) << 20 | @@ -397,6 +432,13 @@ static u32 ilk_lut_10(const struct drm_color_lut *color) drm_color_lut_extract(color->blue, 10); }
+static void ilk_lut_10_pack(struct drm_color_lut *entry, u32 val) +{ + entry->red = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_RED_MASK, val), 10); + entry->green = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_GREEN_MASK, val), 10); + entry->blue = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_BLUE_MASK, val), 10); +} + static void i9xx_color_commit(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); @@ -953,6 +995,13 @@ static u32 chv_cgm_degamma_udw(const struct drm_color_lut *color) return drm_color_lut_extract(color->red, 14); }
+static void chv_cgm_gamma_pack(struct drm_color_lut *entry, u32 ldw, u32 udw) +{ + entry->green = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_GREEN_MASK, ldw), 10); + entry->blue = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_BLUE_MASK, ldw), 10); + entry->red = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_RED_MASK, udw), 10); +} + static void chv_load_cgm_degamma(struct intel_crtc *crtc, const struct drm_property_blob *blob) { @@ -1640,19 +1689,6 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, return true; }
-/* convert hw value with given bit_precision to lut property val */ -static u32 intel_color_lut_pack(u32 val, int bit_precision) -{ - u32 max = 0xffff >> (16 - bit_precision); - - val = clamp_val(val, 0, max); - - if (bit_precision < 16) - val <<= 16 - bit_precision; - - return val; -} - static struct drm_property_blob * i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) { @@ -1674,12 +1710,7 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) for (i = 0; i < LEGACY_LUT_LENGTH; i++) { u32 val = I915_READ(PALETTE(pipe, i));
- lut[i].red = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_RED_MASK, val), 8); - lut[i].green = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_GREEN_MASK, val), 8); - lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_BLUE_MASK, val), 8); + i9xx_lut_8_pack(&lut[i], val); }
return blob; @@ -1712,23 +1743,15 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < lut_size - 1; i++) { - u32 val1 = I915_READ(PALETTE(pipe, 2 * i + 0)); - u32 val2 = I915_READ(PALETTE(pipe, 2 * i + 1)); - - lut[i].red = REG_FIELD_GET(PALETTE_RED_MASK, val2) << 8 | - REG_FIELD_GET(PALETTE_RED_MASK, val1); - lut[i].green = REG_FIELD_GET(PALETTE_GREEN_MASK, val2) << 8 | - REG_FIELD_GET(PALETTE_GREEN_MASK, val1); - lut[i].blue = REG_FIELD_GET(PALETTE_BLUE_MASK, val2) << 8 | - REG_FIELD_GET(PALETTE_BLUE_MASK, val1); + u32 ldw = I915_READ(PALETTE(pipe, 2 * i + 0)); + u32 udw = I915_READ(PALETTE(pipe, 2 * i + 1)); + + i965_lut_10p6_pack(&lut[i], ldw, udw); }
- lut[i].red = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, - I915_READ(PIPEGCMAX(pipe, 0))); - lut[i].green = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, - I915_READ(PIPEGCMAX(pipe, 1))); - lut[i].blue = REG_FIELD_GET(PIPEGCMAX_RGB_MASK, - I915_READ(PIPEGCMAX(pipe, 2))); + lut[i].red = i965_lut_11p6_max_pack(I915_READ(PIPEGCMAX(pipe, 0))); + lut[i].green = i965_lut_11p6_max_pack(I915_READ(PIPEGCMAX(pipe, 1))); + lut[i].blue = i965_lut_11p6_max_pack(I915_READ(PIPEGCMAX(pipe, 2)));
return blob; } @@ -1763,17 +1786,10 @@ chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state) lut = blob->data;
for (i = 0; i < lut_size; i++) { - u32 val; - - val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); - lut[i].green = intel_color_lut_pack(REG_FIELD_GET( - CGM_PIPE_GAMMA_GREEN_MASK, val), 10); - lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( - CGM_PIPE_GAMMA_BLUE_MASK, val), 10); + u32 ldw = I915_READ(CGM_PIPE_GAMMA(pipe, i, 0)); + u32 udw = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1));
- val = I915_READ(CGM_PIPE_GAMMA(pipe, i, 1)); - lut[i].red = intel_color_lut_pack(REG_FIELD_GET( - CGM_PIPE_GAMMA_RED_MASK, val), 10); + chv_cgm_gamma_pack(&lut[i], ldw, udw); }
return blob; @@ -1808,12 +1824,7 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) for (i = 0; i < LEGACY_LUT_LENGTH; i++) { u32 val = I915_READ(LGC_PALETTE(pipe, i));
- lut[i].red = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_RED_MASK, val), 8); - lut[i].green = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_GREEN_MASK, val), 8); - lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( - LGC_PALETTE_BLUE_MASK, val), 8); + i9xx_lut_8_pack(&lut[i], val); }
return blob; @@ -1840,12 +1851,7 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state) for (i = 0; i < lut_size; i++) { u32 val = I915_READ(PREC_PALETTE(pipe, i));
- lut[i].red = intel_color_lut_pack(REG_FIELD_GET( - PREC_PALETTE_RED_MASK, val), 10); - lut[i].green = intel_color_lut_pack(REG_FIELD_GET( - PREC_PALETTE_GREEN_MASK, val), 10); - lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( - PREC_PALETTE_BLUE_MASK, val), 10); + ilk_lut_10_pack(&lut[i], val); }
return blob; @@ -1883,18 +1889,12 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index)
lut = blob->data;
- I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | - PAL_PREC_AUTO_INCREMENT); + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index | PAL_PREC_AUTO_INCREMENT);
for (i = 0; i < hw_lut_size; i++) { u32 val = I915_READ(PREC_PAL_DATA(pipe));
- lut[i].red = intel_color_lut_pack(REG_FIELD_GET( - PREC_PAL_DATA_RED_MASK, val), 10); - lut[i].green = intel_color_lut_pack(REG_FIELD_GET( - PREC_PAL_DATA_GREEN_MASK, val), 10); - lut[i].blue = intel_color_lut_pack(REG_FIELD_GET( - PREC_PAL_DATA_BLUE_MASK, val), 10); + ilk_lut_10_pack(&lut[i], val); }
I915_WRITE(PREC_PAL_INDEX(pipe), 0);
From: Ville Syrjälä ville.syrjala@linux.intel.com
PIPEGCMAX is a 11.6 (or 1.16 if you will) value. Ie. it can represent a value of 1.0 when the maximum we can store in the software LUT is 0.ffff. Clamp the value so that it gets saturated to the max the uapi supports.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 3 ++- drivers/gpu/drm/i915/i915_reg.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 4b2bd5ac0e8d..3fd517fa1de5 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -422,7 +422,8 @@ static void i965_lut_10p6_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
static u16 i965_lut_11p6_max_pack(u32 val) { - return REG_FIELD_GET(PIPEGCMAX_RGB_MASK, val); + /* PIPEGCMAX is 11.6, clamp to 10.6 */ + return clamp_val(val, 0, 0xffff); }
static u32 ilk_lut_10(const struct drm_color_lut *color) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a607ea520829..4a8021a33b64 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5871,7 +5871,6 @@ enum {
#define _PIPEAGCMAX 0x70010 #define _PIPEBGCMAX 0x71010 -#define PIPEGCMAX_RGB_MASK REG_GENMASK(15, 0) #define PIPEGCMAX(pipe, i) _MMIO_PIPE2(pipe, _PIPEAGCMAX + (i) * 4)
#define _PIPE_MISC_A 0x70030
From: Ville Syrjälä ville.syrjala@linux.intel.com
The low level read_lut() functions don't need the entire crtc state as they know exactly what they're reading. Just need to pass in the crtc to get at the pipe. This now neatly mirrors the load_lut() direction.
Signed-off-by: Ville Syrjälä ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_color.c | 51 +++++++++++----------- 1 file changed, 25 insertions(+), 26 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index 3fd517fa1de5..f0f372b9b3bd 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1690,10 +1690,8 @@ bool intel_color_lut_equal(struct drm_property_blob *blob1, return true; }
-static struct drm_property_blob * -i9xx_read_lut_8(const struct intel_crtc_state *crtc_state) +static struct drm_property_blob *i9xx_read_lut_8(struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; @@ -1719,16 +1717,16 @@ i9xx_read_lut_8(const struct intel_crtc_state *crtc_state)
static void i9xx_read_luts(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + if (!crtc_state->gamma_enable) return;
- crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc); }
-static struct drm_property_blob * -i965_read_lut_10p6(const struct intel_crtc_state *crtc_state) +static struct drm_property_blob *i965_read_lut_10p6(struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; @@ -1759,19 +1757,19 @@ i965_read_lut_10p6(const struct intel_crtc_state *crtc_state)
static void i965_read_luts(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + if (!crtc_state->gamma_enable) return;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = i9xx_read_lut_8(crtc); else - crtc_state->hw.gamma_lut = i965_read_lut_10p6(crtc_state); + crtc_state->hw.gamma_lut = i965_read_lut_10p6(crtc); }
-static struct drm_property_blob * -chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state) +static struct drm_property_blob *chv_read_cgm_gamma(struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; @@ -1798,16 +1796,16 @@ chv_read_cgm_gamma(const struct intel_crtc_state *crtc_state)
static void chv_read_luts(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + if (crtc_state->cgm_mode & CGM_PIPE_MODE_GAMMA) - crtc_state->hw.gamma_lut = chv_read_cgm_gamma(crtc_state); + crtc_state->hw.gamma_lut = chv_read_cgm_gamma(crtc); else i965_read_luts(crtc_state); }
-static struct drm_property_blob * -ilk_read_lut_8(const struct intel_crtc_state *crtc_state) +static struct drm_property_blob *ilk_read_lut_8(struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; struct drm_property_blob *blob; @@ -1831,10 +1829,8 @@ ilk_read_lut_8(const struct intel_crtc_state *crtc_state) return blob; }
-static struct drm_property_blob * -ilk_read_lut_10(const struct intel_crtc_state *crtc_state) +static struct drm_property_blob *ilk_read_lut_10(struct intel_crtc *crtc) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; enum pipe pipe = crtc->pipe; @@ -1860,6 +1856,8 @@ ilk_read_lut_10(const struct intel_crtc_state *crtc_state)
static void ilk_read_luts(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + if (!crtc_state->gamma_enable) return;
@@ -1867,15 +1865,14 @@ static void ilk_read_luts(struct intel_crtc_state *crtc_state) return;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc); else - crtc_state->hw.gamma_lut = ilk_read_lut_10(crtc_state); + crtc_state->hw.gamma_lut = ilk_read_lut_10(crtc); }
-static struct drm_property_blob * -glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index) +static struct drm_property_blob *glk_read_lut_10(struct intel_crtc *crtc, + u32 prec_index) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); int i, hw_lut_size = ivb_lut_10_size(prec_index); enum pipe pipe = crtc->pipe; @@ -1905,13 +1902,15 @@ glk_read_lut_10(const struct intel_crtc_state *crtc_state, u32 prec_index)
static void glk_read_luts(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + if (!crtc_state->gamma_enable) return;
if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) - crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc_state); + crtc_state->hw.gamma_lut = ilk_read_lut_8(crtc); else - crtc_state->hw.gamma_lut = glk_read_lut_10(crtc_state, PAL_PREC_INDEX_VALUE(0)); + crtc_state->hw.gamma_lut = glk_read_lut_10(crtc, PAL_PREC_INDEX_VALUE(0)); }
void intel_color_init(struct intel_crtc *crtc)
dri-devel@lists.freedesktop.org