2 * Copyright 2016 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
27 #include <linux/slab.h>
31 #include "color_gamma.h"
33 /* When calculating LUT values the first region and at least one subsequent
34 * region are calculated with full precision. These defines are a demarcation
35 * of where the second region starts and ends.
36 * These are hardcoded values to avoid recalculating them in loops.
38 #define PRECISE_LUT_REGION_START 224
39 #define PRECISE_LUT_REGION_END 239
41 static struct hw_x_point coordinates_x[MAX_HW_POINTS + 2];
43 // these are helpers for calculations to reduce stack usage
44 // do not depend on these being preserved across calls
46 /* Helper to optimize gamma calculation, only use in translate_from_linear, in
47 * particular the dc_fixpt_pow function which is very expensive
48 * The idea is that our regions for X points are exponential and currently they all use
49 * the same number of points (NUM_PTS_IN_REGION) and in each region every point
50 * is exactly 2x the one at the same index in the previous region. In other words
51 * X[i] = 2 * X[i-NUM_PTS_IN_REGION] for i>=16
52 * The other fact is that (2x)^gamma = 2^gamma * x^gamma
53 * So we compute and save x^gamma for the first 16 regions, and for every next region
54 * just multiply with 2^gamma which can be computed once, and save the result so we
55 * recursively compute all the values.
59 * Regamma coefficients are used for both regamma and degamma. Degamma
60 * coefficients are calculated in our formula using the regamma coefficients.
62 /*sRGB 709 2.2 2.4 P3*/
63 static const int32_t numerator01[] = { 31308, 180000, 0, 0, 0};
64 static const int32_t numerator02[] = { 12920, 4500, 0, 0, 0};
65 static const int32_t numerator03[] = { 55, 99, 0, 0, 0};
66 static const int32_t numerator04[] = { 55, 99, 0, 0, 0};
67 static const int32_t numerator05[] = { 2400, 2200, 2200, 2400, 2600};
69 /* one-time setup of X points */
70 void setup_x_points_distribution(void)
72 struct fixed31_32 region_size = dc_fixpt_from_int(128);
76 struct fixed31_32 increment;
78 coordinates_x[MAX_HW_POINTS].x = region_size;
79 coordinates_x[MAX_HW_POINTS + 1].x = region_size;
81 for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
82 region_size = dc_fixpt_div_int(region_size, 2);
83 increment = dc_fixpt_div_int(region_size,
85 seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
86 coordinates_x[seg_offset].x = region_size;
88 for (index = seg_offset + 1;
89 index < seg_offset + NUM_PTS_IN_REGION;
91 coordinates_x[index].x = dc_fixpt_add
92 (coordinates_x[index-1].x, increment);
97 void log_x_points_distribution(struct dal_logger *logger)
101 if (logger != NULL) {
102 LOG_GAMMA_WRITE("Log X Distribution\n");
104 for (i = 0; i < MAX_HW_POINTS; i++)
105 LOG_GAMMA_WRITE("%llu\n", coordinates_x[i].x.value);
109 static void compute_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
111 /* consts for PQ gamma formula. */
112 const struct fixed31_32 m1 =
113 dc_fixpt_from_fraction(159301758, 1000000000);
114 const struct fixed31_32 m2 =
115 dc_fixpt_from_fraction(7884375, 100000);
116 const struct fixed31_32 c1 =
117 dc_fixpt_from_fraction(8359375, 10000000);
118 const struct fixed31_32 c2 =
119 dc_fixpt_from_fraction(188515625, 10000000);
120 const struct fixed31_32 c3 =
121 dc_fixpt_from_fraction(186875, 10000);
123 struct fixed31_32 l_pow_m1;
124 struct fixed31_32 base;
126 if (dc_fixpt_lt(in_x, dc_fixpt_zero))
127 in_x = dc_fixpt_zero;
129 l_pow_m1 = dc_fixpt_pow(in_x, m1);
132 (dc_fixpt_mul(c2, l_pow_m1))),
133 dc_fixpt_add(dc_fixpt_one,
134 (dc_fixpt_mul(c3, l_pow_m1))));
135 *out_y = dc_fixpt_pow(base, m2);
138 static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
140 /* consts for dePQ gamma formula. */
141 const struct fixed31_32 m1 =
142 dc_fixpt_from_fraction(159301758, 1000000000);
143 const struct fixed31_32 m2 =
144 dc_fixpt_from_fraction(7884375, 100000);
145 const struct fixed31_32 c1 =
146 dc_fixpt_from_fraction(8359375, 10000000);
147 const struct fixed31_32 c2 =
148 dc_fixpt_from_fraction(188515625, 10000000);
149 const struct fixed31_32 c3 =
150 dc_fixpt_from_fraction(186875, 10000);
152 struct fixed31_32 l_pow_m1;
153 struct fixed31_32 base, div;
154 struct fixed31_32 base2;
157 if (dc_fixpt_lt(in_x, dc_fixpt_zero))
158 in_x = dc_fixpt_zero;
160 l_pow_m1 = dc_fixpt_pow(in_x,
161 dc_fixpt_div(dc_fixpt_one, m2));
162 base = dc_fixpt_sub(l_pow_m1, c1);
164 div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
166 base2 = dc_fixpt_div(base, div);
167 // avoid complex numbers
168 if (dc_fixpt_lt(base2, dc_fixpt_zero))
169 base2 = dc_fixpt_sub(dc_fixpt_zero, base2);
172 *out_y = dc_fixpt_pow(base2, dc_fixpt_div(dc_fixpt_one, m1));
177 /* de gamma, non-linear to linear */
178 static void compute_hlg_eotf(struct fixed31_32 in_x,
179 struct fixed31_32 *out_y,
180 uint32_t sdr_white_level, uint32_t max_luminance_nits)
185 struct fixed31_32 threshold;
188 struct fixed31_32 scaling_factor =
189 dc_fixpt_from_fraction(max_luminance_nits, sdr_white_level);
190 a = dc_fixpt_from_fraction(17883277, 100000000);
191 b = dc_fixpt_from_fraction(28466892, 100000000);
192 c = dc_fixpt_from_fraction(55991073, 100000000);
193 threshold = dc_fixpt_from_fraction(1, 2);
195 if (dc_fixpt_lt(in_x, threshold)) {
196 x = dc_fixpt_mul(in_x, in_x);
197 x = dc_fixpt_div_int(x, 3);
199 x = dc_fixpt_sub(in_x, c);
200 x = dc_fixpt_div(x, a);
202 x = dc_fixpt_add(x, b);
203 x = dc_fixpt_div_int(x, 12);
205 *out_y = dc_fixpt_mul(x, scaling_factor);
209 /* re gamma, linear to non-linear */
210 static void compute_hlg_oetf(struct fixed31_32 in_x, struct fixed31_32 *out_y,
211 uint32_t sdr_white_level, uint32_t max_luminance_nits)
216 struct fixed31_32 threshold;
219 struct fixed31_32 scaling_factor =
220 dc_fixpt_from_fraction(sdr_white_level, max_luminance_nits);
221 a = dc_fixpt_from_fraction(17883277, 100000000);
222 b = dc_fixpt_from_fraction(28466892, 100000000);
223 c = dc_fixpt_from_fraction(55991073, 100000000);
224 threshold = dc_fixpt_from_fraction(1, 12);
225 x = dc_fixpt_mul(in_x, scaling_factor);
228 if (dc_fixpt_lt(x, threshold)) {
229 x = dc_fixpt_mul(x, dc_fixpt_from_fraction(3, 1));
230 *out_y = dc_fixpt_pow(x, dc_fixpt_half);
232 x = dc_fixpt_mul(x, dc_fixpt_from_fraction(12, 1));
233 x = dc_fixpt_sub(x, b);
235 x = dc_fixpt_mul(a, x);
236 *out_y = dc_fixpt_add(x, c);
241 /* one-time pre-compute PQ values - only for sdr_white_level 80 */
242 void precompute_pq(void)
246 const struct hw_x_point *coord_x = coordinates_x + 32;
247 struct fixed31_32 scaling_factor =
248 dc_fixpt_from_fraction(80, 10000);
250 struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
252 /* pow function has problems with arguments too small */
253 for (i = 0; i < 32; i++)
254 pq_table[i] = dc_fixpt_zero;
256 for (i = 32; i <= MAX_HW_POINTS; i++) {
257 x = dc_fixpt_mul(coord_x->x, scaling_factor);
258 compute_pq(x, &pq_table[i]);
263 /* one-time pre-compute dePQ values - only for max pixel value 125 FP16 */
264 void precompute_de_pq(void)
268 uint32_t begin_index, end_index;
270 struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
271 struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
272 /* X points is 2^-25 to 2^7
273 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
275 begin_index = 13 * NUM_PTS_IN_REGION;
276 end_index = begin_index + 12 * NUM_PTS_IN_REGION;
278 for (i = 0; i <= begin_index; i++)
279 de_pq_table[i] = dc_fixpt_zero;
281 for (; i <= end_index; i++) {
282 compute_de_pq(coordinates_x[i].x, &y);
283 de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
286 for (; i <= MAX_HW_POINTS; i++)
287 de_pq_table[i] = de_pq_table[i-1];
290 struct fixed31_32 divider1;
291 struct fixed31_32 divider2;
292 struct fixed31_32 divider3;
296 static bool build_coefficients(struct gamma_coefficients *coefficients,
297 enum dc_transfer_func_predefined type)
304 if (type == TRANSFER_FUNCTION_SRGB)
306 else if (type == TRANSFER_FUNCTION_BT709)
308 else if (type == TRANSFER_FUNCTION_GAMMA22)
310 else if (type == TRANSFER_FUNCTION_GAMMA24)
312 else if (type == TRANSFER_FUNCTION_GAMMA26)
320 coefficients->a0[i] = dc_fixpt_from_fraction(
321 numerator01[index], 10000000);
322 coefficients->a1[i] = dc_fixpt_from_fraction(
323 numerator02[index], 1000);
324 coefficients->a2[i] = dc_fixpt_from_fraction(
325 numerator03[index], 1000);
326 coefficients->a3[i] = dc_fixpt_from_fraction(
327 numerator04[index], 1000);
328 coefficients->user_gamma[i] = dc_fixpt_from_fraction(
329 numerator05[index], 1000);
332 } while (i != ARRAY_SIZE(coefficients->a0));
337 static struct fixed31_32 translate_from_linear_space(
338 struct translate_from_linear_space_args *args)
340 const struct fixed31_32 one = dc_fixpt_from_int(1);
342 struct fixed31_32 scratch_1, scratch_2;
343 struct calculate_buffer *cal_buffer = args->cal_buffer;
345 if (dc_fixpt_le(one, args->arg))
348 if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0))) {
349 scratch_1 = dc_fixpt_add(one, args->a3);
350 scratch_2 = dc_fixpt_pow(
351 dc_fixpt_neg(args->arg),
352 dc_fixpt_recip(args->gamma));
353 scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
354 scratch_1 = dc_fixpt_sub(args->a2, scratch_1);
357 } else if (dc_fixpt_le(args->a0, args->arg)) {
358 if (cal_buffer->buffer_index == 0) {
359 cal_buffer->gamma_of_2 = dc_fixpt_pow(dc_fixpt_from_int(2),
360 dc_fixpt_recip(args->gamma));
362 scratch_1 = dc_fixpt_add(one, args->a3);
363 /* In the first region (first 16 points) and in the
364 * region delimited by START/END we calculate with
365 * full precision to avoid error accumulation.
367 if ((cal_buffer->buffer_index >= PRECISE_LUT_REGION_START &&
368 cal_buffer->buffer_index <= PRECISE_LUT_REGION_END) ||
369 (cal_buffer->buffer_index < 16))
370 scratch_2 = dc_fixpt_pow(args->arg,
371 dc_fixpt_recip(args->gamma));
373 scratch_2 = dc_fixpt_mul(cal_buffer->gamma_of_2,
374 cal_buffer->buffer[cal_buffer->buffer_index%16]);
376 if (cal_buffer->buffer_index != -1) {
377 cal_buffer->buffer[cal_buffer->buffer_index%16] = scratch_2;
378 cal_buffer->buffer_index++;
381 scratch_1 = dc_fixpt_mul(scratch_1, scratch_2);
382 scratch_1 = dc_fixpt_sub(scratch_1, args->a2);
387 return dc_fixpt_mul(args->arg, args->a1);
391 static struct fixed31_32 translate_from_linear_space_long(
392 struct translate_from_linear_space_args *args)
394 const struct fixed31_32 one = dc_fixpt_from_int(1);
396 if (dc_fixpt_lt(one, args->arg))
399 if (dc_fixpt_le(args->arg, dc_fixpt_neg(args->a0)))
407 dc_fixpt_neg(args->arg),
408 dc_fixpt_recip(args->gamma))));
409 else if (dc_fixpt_le(args->a0, args->arg))
417 dc_fixpt_recip(args->gamma))),
420 return dc_fixpt_mul(args->arg, args->a1);
423 static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg, bool use_eetf, struct calculate_buffer *cal_buffer)
425 struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10);
426 struct translate_from_linear_space_args scratch_gamma_args;
428 scratch_gamma_args.arg = arg;
429 scratch_gamma_args.a0 = dc_fixpt_zero;
430 scratch_gamma_args.a1 = dc_fixpt_zero;
431 scratch_gamma_args.a2 = dc_fixpt_zero;
432 scratch_gamma_args.a3 = dc_fixpt_zero;
433 scratch_gamma_args.cal_buffer = cal_buffer;
434 scratch_gamma_args.gamma = gamma;
437 return translate_from_linear_space_long(&scratch_gamma_args);
439 return translate_from_linear_space(&scratch_gamma_args);
443 static struct fixed31_32 translate_to_linear_space(
444 struct fixed31_32 arg,
445 struct fixed31_32 a0,
446 struct fixed31_32 a1,
447 struct fixed31_32 a2,
448 struct fixed31_32 a3,
449 struct fixed31_32 gamma)
451 struct fixed31_32 linear;
453 a0 = dc_fixpt_mul(a0, a1);
454 if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
456 linear = dc_fixpt_neg(
459 dc_fixpt_sub(a2, arg),
461 dc_fixpt_one, a3)), gamma));
463 else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
464 dc_fixpt_le(arg, a0))
465 linear = dc_fixpt_div(arg, a1);
467 linear = dc_fixpt_pow(
469 dc_fixpt_add(a2, arg),
471 dc_fixpt_one, a3)), gamma);
476 static struct fixed31_32 translate_from_linear_space_ex(
477 struct fixed31_32 arg,
478 struct gamma_coefficients *coeff,
479 uint32_t color_index,
480 struct calculate_buffer *cal_buffer)
482 struct translate_from_linear_space_args scratch_gamma_args;
484 scratch_gamma_args.arg = arg;
485 scratch_gamma_args.a0 = coeff->a0[color_index];
486 scratch_gamma_args.a1 = coeff->a1[color_index];
487 scratch_gamma_args.a2 = coeff->a2[color_index];
488 scratch_gamma_args.a3 = coeff->a3[color_index];
489 scratch_gamma_args.gamma = coeff->user_gamma[color_index];
490 scratch_gamma_args.cal_buffer = cal_buffer;
492 return translate_from_linear_space(&scratch_gamma_args);
496 static inline struct fixed31_32 translate_to_linear_space_ex(
497 struct fixed31_32 arg,
498 struct gamma_coefficients *coeff,
499 uint32_t color_index)
501 return translate_to_linear_space(
503 coeff->a0[color_index],
504 coeff->a1[color_index],
505 coeff->a2[color_index],
506 coeff->a3[color_index],
507 coeff->user_gamma[color_index]);
511 static bool find_software_points(
512 const struct dc_gamma *ramp,
513 const struct gamma_pixel *axis_x,
514 struct fixed31_32 hw_point,
515 enum channel_name channel,
516 uint32_t *index_to_start,
517 uint32_t *index_left,
518 uint32_t *index_right,
519 enum hw_point_position *pos)
521 const uint32_t max_number = ramp->num_entries + 3;
523 struct fixed31_32 left, right;
525 uint32_t i = *index_to_start;
527 while (i < max_number) {
528 if (channel == CHANNEL_NAME_RED) {
531 if (i < max_number - 1)
532 right = axis_x[i + 1].r;
534 right = axis_x[max_number - 1].r;
535 } else if (channel == CHANNEL_NAME_GREEN) {
538 if (i < max_number - 1)
539 right = axis_x[i + 1].g;
541 right = axis_x[max_number - 1].g;
545 if (i < max_number - 1)
546 right = axis_x[i + 1].b;
548 right = axis_x[max_number - 1].b;
551 if (dc_fixpt_le(left, hw_point) &&
552 dc_fixpt_le(hw_point, right)) {
556 if (i < max_number - 1)
557 *index_right = i + 1;
559 *index_right = max_number - 1;
561 *pos = HW_POINT_POSITION_MIDDLE;
564 } else if ((i == *index_to_start) &&
565 dc_fixpt_le(hw_point, left)) {
570 *pos = HW_POINT_POSITION_LEFT;
573 } else if ((i == max_number - 1) &&
574 dc_fixpt_le(right, hw_point)) {
579 *pos = HW_POINT_POSITION_RIGHT;
590 static bool build_custom_gamma_mapping_coefficients_worker(
591 const struct dc_gamma *ramp,
592 struct pixel_gamma_point *coeff,
593 const struct hw_x_point *coordinates_x,
594 const struct gamma_pixel *axis_x,
595 enum channel_name channel,
596 uint32_t number_of_points)
600 while (i <= number_of_points) {
601 struct fixed31_32 coord_x;
603 uint32_t index_to_start = 0;
604 uint32_t index_left = 0;
605 uint32_t index_right = 0;
607 enum hw_point_position hw_pos;
609 struct gamma_point *point;
611 struct fixed31_32 left_pos;
612 struct fixed31_32 right_pos;
614 if (channel == CHANNEL_NAME_RED)
615 coord_x = coordinates_x[i].regamma_y_red;
616 else if (channel == CHANNEL_NAME_GREEN)
617 coord_x = coordinates_x[i].regamma_y_green;
619 coord_x = coordinates_x[i].regamma_y_blue;
621 if (!find_software_points(
622 ramp, axis_x, coord_x, channel,
623 &index_to_start, &index_left, &index_right, &hw_pos)) {
628 if (index_left >= ramp->num_entries + 3) {
633 if (index_right >= ramp->num_entries + 3) {
638 if (channel == CHANNEL_NAME_RED) {
641 left_pos = axis_x[index_left].r;
642 right_pos = axis_x[index_right].r;
643 } else if (channel == CHANNEL_NAME_GREEN) {
646 left_pos = axis_x[index_left].g;
647 right_pos = axis_x[index_right].g;
651 left_pos = axis_x[index_left].b;
652 right_pos = axis_x[index_right].b;
655 if (hw_pos == HW_POINT_POSITION_MIDDLE)
656 point->coeff = dc_fixpt_div(
663 else if (hw_pos == HW_POINT_POSITION_LEFT)
664 point->coeff = dc_fixpt_zero;
665 else if (hw_pos == HW_POINT_POSITION_RIGHT)
666 point->coeff = dc_fixpt_from_int(2);
672 point->left_index = index_left;
673 point->right_index = index_right;
682 static struct fixed31_32 calculate_mapped_value(
683 struct pwl_float_data *rgb,
684 const struct pixel_gamma_point *coeff,
685 enum channel_name channel,
688 const struct gamma_point *point;
690 struct fixed31_32 result;
692 if (channel == CHANNEL_NAME_RED)
694 else if (channel == CHANNEL_NAME_GREEN)
699 if ((point->left_index < 0) || (point->left_index > max_index)) {
701 return dc_fixpt_zero;
704 if ((point->right_index < 0) || (point->right_index > max_index)) {
706 return dc_fixpt_zero;
709 if (point->pos == HW_POINT_POSITION_MIDDLE)
710 if (channel == CHANNEL_NAME_RED)
711 result = dc_fixpt_add(
715 rgb[point->right_index].r,
716 rgb[point->left_index].r)),
717 rgb[point->left_index].r);
718 else if (channel == CHANNEL_NAME_GREEN)
719 result = dc_fixpt_add(
723 rgb[point->right_index].g,
724 rgb[point->left_index].g)),
725 rgb[point->left_index].g);
727 result = dc_fixpt_add(
731 rgb[point->right_index].b,
732 rgb[point->left_index].b)),
733 rgb[point->left_index].b);
734 else if (point->pos == HW_POINT_POSITION_LEFT) {
736 result = dc_fixpt_zero;
738 result = dc_fixpt_one;
744 static void build_pq(struct pwl_float_data_ex *rgb_regamma,
745 uint32_t hw_points_num,
746 const struct hw_x_point *coordinate_x,
747 uint32_t sdr_white_level)
749 uint32_t i, start_index;
751 struct pwl_float_data_ex *rgb = rgb_regamma;
752 const struct hw_x_point *coord_x = coordinate_x;
754 struct fixed31_32 output;
755 struct fixed31_32 scaling_factor =
756 dc_fixpt_from_fraction(sdr_white_level, 10000);
757 struct fixed31_32 *pq_table = mod_color_get_table(type_pq_table);
759 if (!mod_color_is_table_init(type_pq_table) && sdr_white_level == 80) {
761 mod_color_set_table_init_state(type_pq_table, true);
764 /* TODO: start index is from segment 2^-24, skipping first segment
765 * due to x values too small for power calculations
769 coord_x += start_index;
771 for (i = start_index; i <= hw_points_num; i++) {
772 /* Multiply 0.008 as regamma is 0-1 and FP16 input is 0-125.
775 if (sdr_white_level == 80) {
776 output = pq_table[i];
778 x = dc_fixpt_mul(coord_x->x, scaling_factor);
779 compute_pq(x, &output);
782 /* should really not happen? */
783 if (dc_fixpt_lt(output, dc_fixpt_zero))
784 output = dc_fixpt_zero;
785 else if (dc_fixpt_lt(dc_fixpt_one, output))
786 output = dc_fixpt_one;
797 static void build_de_pq(struct pwl_float_data_ex *de_pq,
798 uint32_t hw_points_num,
799 const struct hw_x_point *coordinate_x)
802 struct fixed31_32 output;
803 struct fixed31_32 *de_pq_table = mod_color_get_table(type_de_pq_table);
804 struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
806 if (!mod_color_is_table_init(type_de_pq_table)) {
808 mod_color_set_table_init_state(type_de_pq_table, true);
812 for (i = 0; i <= hw_points_num; i++) {
813 output = de_pq_table[i];
814 /* should really not happen? */
815 if (dc_fixpt_lt(output, dc_fixpt_zero))
816 output = dc_fixpt_zero;
817 else if (dc_fixpt_lt(scaling_factor, output))
818 output = scaling_factor;
825 static bool build_regamma(struct pwl_float_data_ex *rgb_regamma,
826 uint32_t hw_points_num,
827 const struct hw_x_point *coordinate_x,
828 enum dc_transfer_func_predefined type,
829 struct calculate_buffer *cal_buffer)
834 struct gamma_coefficients *coeff;
835 struct pwl_float_data_ex *rgb = rgb_regamma;
836 const struct hw_x_point *coord_x = coordinate_x;
838 coeff = kvzalloc(sizeof(*coeff), GFP_KERNEL);
842 if (!build_coefficients(coeff, type))
845 memset(cal_buffer->buffer, 0, NUM_PTS_IN_REGION * sizeof(struct fixed31_32));
846 cal_buffer->buffer_index = 0; // see variable definition for more info
849 while (i <= hw_points_num) {
850 /* TODO use y vs r,g,b */
851 rgb->r = translate_from_linear_space_ex(
852 coord_x->x, coeff, 0, cal_buffer);
859 cal_buffer->buffer_index = -1;
866 static void hermite_spline_eetf(struct fixed31_32 input_x,
867 struct fixed31_32 max_display,
868 struct fixed31_32 min_display,
869 struct fixed31_32 max_content,
870 struct fixed31_32 *out_x)
872 struct fixed31_32 min_lum_pq;
873 struct fixed31_32 max_lum_pq;
874 struct fixed31_32 max_content_pq;
875 struct fixed31_32 ks;
876 struct fixed31_32 E1;
877 struct fixed31_32 E2;
878 struct fixed31_32 E3;
880 struct fixed31_32 t2;
881 struct fixed31_32 t3;
882 struct fixed31_32 two;
883 struct fixed31_32 three;
884 struct fixed31_32 temp1;
885 struct fixed31_32 temp2;
886 struct fixed31_32 a = dc_fixpt_from_fraction(15, 10);
887 struct fixed31_32 b = dc_fixpt_from_fraction(5, 10);
888 struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small
890 if (dc_fixpt_eq(max_content, dc_fixpt_zero)) {
891 *out_x = dc_fixpt_zero;
895 compute_pq(input_x, &E1);
896 compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq);
897 compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq);
898 compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird
899 a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent
900 ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b
902 if (dc_fixpt_lt(E1, ks))
904 else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) {
905 if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks)))
906 // t = (E1 - ks) / (1 - ks)
907 t = dc_fixpt_div(dc_fixpt_sub(E1, ks),
908 dc_fixpt_sub(dc_fixpt_one, ks));
912 two = dc_fixpt_from_int(2);
913 three = dc_fixpt_from_int(3);
915 t2 = dc_fixpt_mul(t, t);
916 t3 = dc_fixpt_mul(t2, t);
917 temp1 = dc_fixpt_mul(two, t3);
918 temp2 = dc_fixpt_mul(three, t2);
920 // (2t^3 - 3t^2 + 1) * ks
921 E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one,
922 dc_fixpt_sub(temp1, temp2)));
924 // (-2t^3 + 3t^2) * max_lum_pq
925 E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq,
926 dc_fixpt_sub(temp2, temp1)));
928 temp1 = dc_fixpt_mul(two, t2);
929 temp2 = dc_fixpt_sub(dc_fixpt_one, ks);
931 // (t^3 - 2t^2 + t) * (1-ks)
932 E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2,
933 dc_fixpt_add(t, dc_fixpt_sub(t3, temp1))));
937 temp1 = dc_fixpt_sub(dc_fixpt_one, E2);
938 temp2 = dc_fixpt_mul(temp1, temp1);
939 temp2 = dc_fixpt_mul(temp2, temp2);
942 E3 = dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2));
943 compute_de_pq(E3, out_x);
945 *out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content));
948 static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
949 uint32_t hw_points_num,
950 const struct hw_x_point *coordinate_x,
951 const struct hdr_tm_params *fs_params,
952 struct calculate_buffer *cal_buffer)
955 struct pwl_float_data_ex *rgb = rgb_regamma;
956 const struct hw_x_point *coord_x = coordinate_x;
957 const struct hw_x_point *prv_coord_x = coord_x;
958 struct fixed31_32 scaledX = dc_fixpt_zero;
959 struct fixed31_32 scaledX1 = dc_fixpt_zero;
960 struct fixed31_32 max_display;
961 struct fixed31_32 min_display;
962 struct fixed31_32 max_content;
963 struct fixed31_32 clip = dc_fixpt_one;
964 struct fixed31_32 output;
965 bool use_eetf = false;
966 bool is_clipped = false;
967 struct fixed31_32 sdr_white_level;
968 struct fixed31_32 coordX_diff;
969 struct fixed31_32 out_dist_max;
970 struct fixed31_32 bright_norm;
972 if (fs_params->max_content == 0 ||
973 fs_params->max_display == 0)
976 max_display = dc_fixpt_from_int(fs_params->max_display);
977 min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000);
978 max_content = dc_fixpt_from_int(fs_params->max_content);
979 sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level);
981 if (fs_params->min_display > 1000) // cap at 0.1 at the bottom
982 min_display = dc_fixpt_from_fraction(1, 10);
983 if (fs_params->max_display < 100) // cap at 100 at the top
984 max_display = dc_fixpt_from_int(100);
986 // only max used, we don't adjust min luminance
987 if (fs_params->max_content > fs_params->max_display)
990 max_content = max_display;
993 cal_buffer->buffer_index = 0; // see var definition for more info
994 rgb += 32; // first 32 points have problems with fixed point, too small
997 for (i = 32; i <= hw_points_num; i++) {
1000 /* max content is equal 1 */
1001 scaledX1 = dc_fixpt_div(coord_x->x,
1002 dc_fixpt_div(max_content, sdr_white_level));
1003 hermite_spline_eetf(scaledX1, max_display, min_display,
1004 max_content, &scaledX);
1006 scaledX = dc_fixpt_div(coord_x->x,
1007 dc_fixpt_div(max_display, sdr_white_level));
1009 if (dc_fixpt_lt(scaledX, clip)) {
1010 if (dc_fixpt_lt(scaledX, dc_fixpt_zero))
1011 output = dc_fixpt_zero;
1013 output = calculate_gamma22(scaledX, use_eetf, cal_buffer);
1015 // Ensure output respects reasonable boundaries
1016 output = dc_fixpt_clamp(output, dc_fixpt_zero, dc_fixpt_one);
1022 /* Here clipping happens for the first time */
1025 /* The next few lines implement the equation
1026 * output = prev_out +
1027 * (coord_x->x - prev_coord_x->x) *
1028 * (1.0 - prev_out) /
1029 * (maxDisp/sdr_white_level - prevCoordX)
1031 * This equation interpolates the first point
1032 * after max_display/80 so that the slope from
1033 * hw_x_before_max and hw_x_after_max is such
1034 * that we hit Y=1.0 at max_display/80.
1037 coordX_diff = dc_fixpt_sub(coord_x->x, prv_coord_x->x);
1038 out_dist_max = dc_fixpt_sub(dc_fixpt_one, output);
1039 bright_norm = dc_fixpt_div(max_display, sdr_white_level);
1041 output = dc_fixpt_add(
1042 output, dc_fixpt_mul(
1043 coordX_diff, dc_fixpt_div(
1045 dc_fixpt_sub(bright_norm, prv_coord_x->x)
1050 /* Relaxing the maximum boundary to 1.07 (instead of 1.0)
1051 * because the last point in the curve must be such that
1052 * the maximum display pixel brightness interpolates to
1053 * exactly 1.0. The worst case scenario was calculated
1054 * around 1.057, so the limit of 1.07 leaves some safety
1057 output = dc_fixpt_clamp(output, dc_fixpt_zero,
1058 dc_fixpt_from_fraction(107, 100));
1065 /* Every other clipping after the first
1066 * one is dealt with here
1073 prv_coord_x = coord_x;
1077 cal_buffer->buffer_index = -1;
1082 static bool build_degamma(struct pwl_float_data_ex *curve,
1083 uint32_t hw_points_num,
1084 const struct hw_x_point *coordinate_x, enum dc_transfer_func_predefined type)
1087 struct gamma_coefficients coeff;
1088 uint32_t begin_index, end_index;
1091 if (!build_coefficients(&coeff, type))
1096 /* X points is 2^-25 to 2^7
1097 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
1099 begin_index = 13 * NUM_PTS_IN_REGION;
1100 end_index = begin_index + 12 * NUM_PTS_IN_REGION;
1102 while (i != begin_index) {
1103 curve[i].r = dc_fixpt_zero;
1104 curve[i].g = dc_fixpt_zero;
1105 curve[i].b = dc_fixpt_zero;
1109 while (i != end_index) {
1110 curve[i].r = translate_to_linear_space_ex(
1111 coordinate_x[i].x, &coeff, 0);
1112 curve[i].g = curve[i].r;
1113 curve[i].b = curve[i].r;
1116 while (i != hw_points_num + 1) {
1117 curve[i].r = dc_fixpt_one;
1118 curve[i].g = dc_fixpt_one;
1119 curve[i].b = dc_fixpt_one;
1131 static void build_hlg_degamma(struct pwl_float_data_ex *degamma,
1132 uint32_t hw_points_num,
1133 const struct hw_x_point *coordinate_x,
1134 uint32_t sdr_white_level, uint32_t max_luminance_nits)
1138 struct pwl_float_data_ex *rgb = degamma;
1139 const struct hw_x_point *coord_x = coordinate_x;
1142 // check when i == 434
1143 while (i != hw_points_num + 1) {
1144 compute_hlg_eotf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1154 static void build_hlg_regamma(struct pwl_float_data_ex *regamma,
1155 uint32_t hw_points_num,
1156 const struct hw_x_point *coordinate_x,
1157 uint32_t sdr_white_level, uint32_t max_luminance_nits)
1161 struct pwl_float_data_ex *rgb = regamma;
1162 const struct hw_x_point *coord_x = coordinate_x;
1167 while (i != hw_points_num + 1) {
1168 compute_hlg_oetf(coord_x->x, &rgb->r, sdr_white_level, max_luminance_nits);
1177 static void scale_gamma(struct pwl_float_data *pwl_rgb,
1178 const struct dc_gamma *ramp,
1179 struct dividers dividers)
1181 const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
1182 const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
1183 struct fixed31_32 scaler = max_os;
1185 struct pwl_float_data *rgb = pwl_rgb;
1186 struct pwl_float_data *rgb_last = rgb + ramp->num_entries - 1;
1191 if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
1192 dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
1193 dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
1194 scaler = max_driver;
1198 } while (i != ramp->num_entries);
1203 rgb->r = dc_fixpt_div(
1204 ramp->entries.red[i], scaler);
1205 rgb->g = dc_fixpt_div(
1206 ramp->entries.green[i], scaler);
1207 rgb->b = dc_fixpt_div(
1208 ramp->entries.blue[i], scaler);
1212 } while (i != ramp->num_entries);
1214 rgb->r = dc_fixpt_mul(rgb_last->r,
1216 rgb->g = dc_fixpt_mul(rgb_last->g,
1218 rgb->b = dc_fixpt_mul(rgb_last->b,
1223 rgb->r = dc_fixpt_mul(rgb_last->r,
1225 rgb->g = dc_fixpt_mul(rgb_last->g,
1227 rgb->b = dc_fixpt_mul(rgb_last->b,
1232 rgb->r = dc_fixpt_mul(rgb_last->r,
1234 rgb->g = dc_fixpt_mul(rgb_last->g,
1236 rgb->b = dc_fixpt_mul(rgb_last->b,
1240 static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
1241 const struct dc_gamma *ramp,
1242 struct dividers dividers)
1245 struct fixed31_32 min = dc_fixpt_zero;
1246 struct fixed31_32 max = dc_fixpt_one;
1248 struct fixed31_32 delta = dc_fixpt_zero;
1249 struct fixed31_32 offset = dc_fixpt_zero;
1251 for (i = 0 ; i < ramp->num_entries; i++) {
1252 if (dc_fixpt_lt(ramp->entries.red[i], min))
1253 min = ramp->entries.red[i];
1255 if (dc_fixpt_lt(ramp->entries.green[i], min))
1256 min = ramp->entries.green[i];
1258 if (dc_fixpt_lt(ramp->entries.blue[i], min))
1259 min = ramp->entries.blue[i];
1261 if (dc_fixpt_lt(max, ramp->entries.red[i]))
1262 max = ramp->entries.red[i];
1264 if (dc_fixpt_lt(max, ramp->entries.green[i]))
1265 max = ramp->entries.green[i];
1267 if (dc_fixpt_lt(max, ramp->entries.blue[i]))
1268 max = ramp->entries.blue[i];
1271 if (dc_fixpt_lt(min, dc_fixpt_zero))
1272 delta = dc_fixpt_neg(min);
1274 offset = dc_fixpt_add(min, max);
1276 for (i = 0 ; i < ramp->num_entries; i++) {
1277 pwl_rgb[i].r = dc_fixpt_div(
1279 ramp->entries.red[i], delta), offset);
1280 pwl_rgb[i].g = dc_fixpt_div(
1282 ramp->entries.green[i], delta), offset);
1283 pwl_rgb[i].b = dc_fixpt_div(
1285 ramp->entries.blue[i], delta), offset);
1289 pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
1290 pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1291 pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
1292 pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1293 pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
1294 pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1296 pwl_rgb[i].r = dc_fixpt_sub(dc_fixpt_mul_int(
1297 pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
1298 pwl_rgb[i].g = dc_fixpt_sub(dc_fixpt_mul_int(
1299 pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
1300 pwl_rgb[i].b = dc_fixpt_sub(dc_fixpt_mul_int(
1301 pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
1304 /* todo: all these scale_gamma functions are inherently the same but
1305 * take different structures as params or different format for ramp
1306 * values. We could probably implement it in a more generic fashion
1308 static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
1309 const struct regamma_ramp *ramp,
1310 struct dividers dividers)
1312 unsigned short max_driver = 0xFFFF;
1313 unsigned short max_os = 0xFF00;
1314 unsigned short scaler = max_os;
1316 struct pwl_float_data *rgb = pwl_rgb;
1317 struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
1321 if (ramp->gamma[i] > max_os ||
1322 ramp->gamma[i + 256] > max_os ||
1323 ramp->gamma[i + 512] > max_os) {
1324 scaler = max_driver;
1328 } while (i != GAMMA_RGB_256_ENTRIES);
1332 rgb->r = dc_fixpt_from_fraction(
1333 ramp->gamma[i], scaler);
1334 rgb->g = dc_fixpt_from_fraction(
1335 ramp->gamma[i + 256], scaler);
1336 rgb->b = dc_fixpt_from_fraction(
1337 ramp->gamma[i + 512], scaler);
1341 } while (i != GAMMA_RGB_256_ENTRIES);
1343 rgb->r = dc_fixpt_mul(rgb_last->r,
1345 rgb->g = dc_fixpt_mul(rgb_last->g,
1347 rgb->b = dc_fixpt_mul(rgb_last->b,
1352 rgb->r = dc_fixpt_mul(rgb_last->r,
1354 rgb->g = dc_fixpt_mul(rgb_last->g,
1356 rgb->b = dc_fixpt_mul(rgb_last->b,
1361 rgb->r = dc_fixpt_mul(rgb_last->r,
1363 rgb->g = dc_fixpt_mul(rgb_last->g,
1365 rgb->b = dc_fixpt_mul(rgb_last->b,
1370 * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here
1371 * Input is evenly distributed in the output color space as specified in
1374 * Interpolation details:
1375 * 1D LUT has 4096 values which give curve correction in 0-1 float range
1376 * for evenly spaced points in 0-1 range. lut1D[index] gives correction
1378 * First we find index for which:
1379 * index/4095 < regamma_y < (index+1)/4095 =>
1380 * index < 4095*regamma_y < index + 1
1381 * norm_y = 4095*regamma_y, and index is just truncating to nearest integer
1382 * lut1 = lut1D[index], lut2 = lut1D[index+1]
1384 * adjustedY is then linearly interpolating regamma Y between lut1 and lut2
1386 * Custom degamma on Linux uses the same interpolation math, so is handled here
1388 static void apply_lut_1d(
1389 const struct dc_gamma *ramp,
1390 uint32_t num_hw_points,
1391 struct dc_transfer_func_distributed_points *tf_pts)
1395 struct fixed31_32 *regamma_y;
1396 struct fixed31_32 norm_y;
1397 struct fixed31_32 lut1;
1398 struct fixed31_32 lut2;
1399 const int max_lut_index = 4095;
1400 const struct fixed31_32 penult_lut_index_f =
1401 dc_fixpt_from_int(max_lut_index-1);
1402 const struct fixed31_32 max_lut_index_f =
1403 dc_fixpt_from_int(max_lut_index);
1404 int32_t index = 0, index_next = 0;
1405 struct fixed31_32 index_f;
1406 struct fixed31_32 delta_lut;
1407 struct fixed31_32 delta_index;
1409 if (ramp->type != GAMMA_CS_TFM_1D && ramp->type != GAMMA_CUSTOM)
1410 return; // this is not expected
1412 for (i = 0; i < num_hw_points; i++) {
1413 for (color = 0; color < 3; color++) {
1415 regamma_y = &tf_pts->red[i];
1416 else if (color == 1)
1417 regamma_y = &tf_pts->green[i];
1419 regamma_y = &tf_pts->blue[i];
1421 norm_y = dc_fixpt_mul(max_lut_index_f,
1423 index = dc_fixpt_floor(norm_y);
1424 index_f = dc_fixpt_from_int(index);
1429 if (index <= max_lut_index)
1430 index_next = (index == max_lut_index) ? index : index+1;
1432 /* Here we are dealing with the last point in the curve,
1433 * which in some cases might exceed the range given by
1434 * max_lut_index. So we interpolate the value using
1435 * max_lut_index and max_lut_index - 1.
1437 index = max_lut_index - 1;
1438 index_next = max_lut_index;
1439 index_f = penult_lut_index_f;
1443 lut1 = ramp->entries.red[index];
1444 lut2 = ramp->entries.red[index_next];
1445 } else if (color == 1) {
1446 lut1 = ramp->entries.green[index];
1447 lut2 = ramp->entries.green[index_next];
1449 lut1 = ramp->entries.blue[index];
1450 lut2 = ramp->entries.blue[index_next];
1453 // we have everything now, so interpolate
1454 delta_lut = dc_fixpt_sub(lut2, lut1);
1455 delta_index = dc_fixpt_sub(norm_y, index_f);
1457 *regamma_y = dc_fixpt_add(lut1,
1458 dc_fixpt_mul(delta_index, delta_lut));
1463 static void build_evenly_distributed_points(
1464 struct gamma_pixel *points,
1465 uint32_t numberof_points,
1466 struct dividers dividers)
1468 struct gamma_pixel *p = points;
1469 struct gamma_pixel *p_last;
1473 // This function should not gets called with 0 as a parameter
1474 ASSERT(numberof_points > 0);
1475 p_last = p + numberof_points - 1;
1478 struct fixed31_32 value = dc_fixpt_from_fraction(i,
1479 numberof_points - 1);
1487 } while (i < numberof_points);
1489 p->r = dc_fixpt_div(p_last->r, dividers.divider1);
1490 p->g = dc_fixpt_div(p_last->g, dividers.divider1);
1491 p->b = dc_fixpt_div(p_last->b, dividers.divider1);
1495 p->r = dc_fixpt_div(p_last->r, dividers.divider2);
1496 p->g = dc_fixpt_div(p_last->g, dividers.divider2);
1497 p->b = dc_fixpt_div(p_last->b, dividers.divider2);
1501 p->r = dc_fixpt_div(p_last->r, dividers.divider3);
1502 p->g = dc_fixpt_div(p_last->g, dividers.divider3);
1503 p->b = dc_fixpt_div(p_last->b, dividers.divider3);
1506 static inline void copy_rgb_regamma_to_coordinates_x(
1507 struct hw_x_point *coordinates_x,
1508 uint32_t hw_points_num,
1509 const struct pwl_float_data_ex *rgb_ex)
1511 struct hw_x_point *coords = coordinates_x;
1513 const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
1515 while (i <= hw_points_num + 1) {
1516 coords->regamma_y_red = rgb_regamma->r;
1517 coords->regamma_y_green = rgb_regamma->g;
1518 coords->regamma_y_blue = rgb_regamma->b;
1526 static bool calculate_interpolated_hardware_curve(
1527 const struct dc_gamma *ramp,
1528 struct pixel_gamma_point *coeff128,
1529 struct pwl_float_data *rgb_user,
1530 const struct hw_x_point *coordinates_x,
1531 const struct gamma_pixel *axis_x,
1532 uint32_t number_of_points,
1533 struct dc_transfer_func_distributed_points *tf_pts)
1536 const struct pixel_gamma_point *coeff = coeff128;
1537 uint32_t max_entries = 3 - 1;
1541 for (i = 0; i < 3; i++) {
1542 if (!build_custom_gamma_mapping_coefficients_worker(
1543 ramp, coeff128, coordinates_x, axis_x, i,
1549 max_entries += ramp->num_entries;
1551 /* TODO: float point case */
1553 while (i <= number_of_points) {
1554 tf_pts->red[i] = calculate_mapped_value(
1555 rgb_user, coeff, CHANNEL_NAME_RED, max_entries);
1556 tf_pts->green[i] = calculate_mapped_value(
1557 rgb_user, coeff, CHANNEL_NAME_GREEN, max_entries);
1558 tf_pts->blue[i] = calculate_mapped_value(
1559 rgb_user, coeff, CHANNEL_NAME_BLUE, max_entries);
1568 /* The "old" interpolation uses a complicated scheme to build an array of
1569 * coefficients while also using an array of 0-255 normalized to 0-1
1570 * Then there's another loop using both of the above + new scaled user ramp
1571 * and we concatenate them. It also searches for points of interpolation and
1572 * uses enums for positions.
1574 * This function uses a different approach:
1575 * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
1576 * To find index for hwX , we notice the following:
1577 * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1
1578 * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
1580 * Once the index is known, combined Y is simply:
1581 * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
1583 * We should switch to this method in all cases, it's simpler and faster
1584 * ToDo one day - for now this only applies to ADL regamma to avoid regression
1585 * for regular use cases (sRGB and PQ)
1587 static void interpolate_user_regamma(uint32_t hw_points_num,
1588 struct pwl_float_data *rgb_user,
1590 struct dc_transfer_func_distributed_points *tf_pts)
1596 struct fixed31_32 *tf_point;
1597 struct fixed31_32 hw_x;
1598 struct fixed31_32 norm_factor =
1599 dc_fixpt_from_int(255);
1600 struct fixed31_32 norm_x;
1601 struct fixed31_32 index_f;
1602 struct fixed31_32 lut1;
1603 struct fixed31_32 lut2;
1604 struct fixed31_32 delta_lut;
1605 struct fixed31_32 delta_index;
1606 const struct fixed31_32 one = dc_fixpt_from_int(1);
1609 /* fixed_pt library has problems handling too small values */
1611 tf_pts->red[i] = dc_fixpt_zero;
1612 tf_pts->green[i] = dc_fixpt_zero;
1613 tf_pts->blue[i] = dc_fixpt_zero;
1616 while (i <= hw_points_num + 1) {
1617 for (color = 0; color < 3; color++) {
1619 tf_point = &tf_pts->red[i];
1620 else if (color == 1)
1621 tf_point = &tf_pts->green[i];
1623 tf_point = &tf_pts->blue[i];
1625 if (apply_degamma) {
1627 hw_x = coordinates_x[i].regamma_y_red;
1628 else if (color == 1)
1629 hw_x = coordinates_x[i].regamma_y_green;
1631 hw_x = coordinates_x[i].regamma_y_blue;
1633 hw_x = coordinates_x[i].x;
1635 if (dc_fixpt_le(one, hw_x))
1638 norm_x = dc_fixpt_mul(norm_factor, hw_x);
1639 index = dc_fixpt_floor(norm_x);
1640 if (index < 0 || index > 255)
1643 index_f = dc_fixpt_from_int(index);
1644 index_next = (index == 255) ? index : index + 1;
1647 lut1 = rgb_user[index].r;
1648 lut2 = rgb_user[index_next].r;
1649 } else if (color == 1) {
1650 lut1 = rgb_user[index].g;
1651 lut2 = rgb_user[index_next].g;
1653 lut1 = rgb_user[index].b;
1654 lut2 = rgb_user[index_next].b;
1657 // we have everything now, so interpolate
1658 delta_lut = dc_fixpt_sub(lut2, lut1);
1659 delta_index = dc_fixpt_sub(norm_x, index_f);
1661 *tf_point = dc_fixpt_add(lut1,
1662 dc_fixpt_mul(delta_index, delta_lut));
1668 static void build_new_custom_resulted_curve(
1669 uint32_t hw_points_num,
1670 struct dc_transfer_func_distributed_points *tf_pts)
1674 while (i != hw_points_num + 1) {
1675 tf_pts->red[i] = dc_fixpt_clamp(
1676 tf_pts->red[i], dc_fixpt_zero,
1678 tf_pts->green[i] = dc_fixpt_clamp(
1679 tf_pts->green[i], dc_fixpt_zero,
1681 tf_pts->blue[i] = dc_fixpt_clamp(
1682 tf_pts->blue[i], dc_fixpt_zero,
1689 static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
1690 uint32_t hw_points_num, struct calculate_buffer *cal_buffer)
1694 struct gamma_coefficients coeff;
1695 struct pwl_float_data_ex *rgb = rgb_regamma;
1696 const struct hw_x_point *coord_x = coordinates_x;
1698 build_coefficients(&coeff, true);
1701 while (i != hw_points_num + 1) {
1702 rgb->r = translate_from_linear_space_ex(
1703 coord_x->x, &coeff, 0, cal_buffer);
1712 static bool map_regamma_hw_to_x_user(
1713 const struct dc_gamma *ramp,
1714 struct pixel_gamma_point *coeff128,
1715 struct pwl_float_data *rgb_user,
1716 struct hw_x_point *coords_x,
1717 const struct gamma_pixel *axis_x,
1718 const struct pwl_float_data_ex *rgb_regamma,
1719 uint32_t hw_points_num,
1720 struct dc_transfer_func_distributed_points *tf_pts,
1724 /* setup to spare calculated ideal regamma values */
1727 struct hw_x_point *coords = coords_x;
1728 const struct pwl_float_data_ex *regamma = rgb_regamma;
1730 if (ramp && mapUserRamp) {
1731 copy_rgb_regamma_to_coordinates_x(coords,
1735 calculate_interpolated_hardware_curve(
1736 ramp, coeff128, rgb_user, coords, axis_x,
1737 hw_points_num, tf_pts);
1739 /* just copy current rgb_regamma into tf_pts */
1740 while (i <= hw_points_num) {
1741 tf_pts->red[i] = regamma->r;
1742 tf_pts->green[i] = regamma->g;
1743 tf_pts->blue[i] = regamma->b;
1751 /* this should be named differently, all it does is clamp to 0-1 */
1752 build_new_custom_resulted_curve(hw_points_num, tf_pts);
1758 #define _EXTRA_POINTS 3
1760 bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
1761 const struct regamma_lut *regamma,
1762 struct calculate_buffer *cal_buffer,
1763 const struct dc_gamma *ramp)
1765 struct gamma_coefficients coeff;
1766 const struct hw_x_point *coord_x = coordinates_x;
1770 coeff.a0[i] = dc_fixpt_from_fraction(
1771 regamma->coeff.A0[i], 10000000);
1772 coeff.a1[i] = dc_fixpt_from_fraction(
1773 regamma->coeff.A1[i], 1000);
1774 coeff.a2[i] = dc_fixpt_from_fraction(
1775 regamma->coeff.A2[i], 1000);
1776 coeff.a3[i] = dc_fixpt_from_fraction(
1777 regamma->coeff.A3[i], 1000);
1778 coeff.user_gamma[i] = dc_fixpt_from_fraction(
1779 regamma->coeff.gamma[i], 1000);
1785 /* fixed_pt library has problems handling too small values */
1787 output_tf->tf_pts.red[i] = dc_fixpt_zero;
1788 output_tf->tf_pts.green[i] = dc_fixpt_zero;
1789 output_tf->tf_pts.blue[i] = dc_fixpt_zero;
1793 while (i != MAX_HW_POINTS + 1) {
1794 output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
1795 coord_x->x, &coeff, 0, cal_buffer);
1796 output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
1797 coord_x->x, &coeff, 1, cal_buffer);
1798 output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
1799 coord_x->x, &coeff, 2, cal_buffer);
1804 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1805 apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1807 // this function just clamps output to 0-1
1808 build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
1809 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1814 bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
1815 const struct regamma_lut *regamma,
1816 struct calculate_buffer *cal_buffer,
1817 const struct dc_gamma *ramp)
1819 struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
1820 struct dividers dividers;
1822 struct pwl_float_data *rgb_user = NULL;
1823 struct pwl_float_data_ex *rgb_regamma = NULL;
1826 if (regamma == NULL)
1829 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1831 rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS,
1835 goto rgb_user_alloc_fail;
1837 rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
1838 sizeof(*rgb_regamma),
1841 goto rgb_regamma_alloc_fail;
1843 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1844 dividers.divider2 = dc_fixpt_from_int(2);
1845 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1847 scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers);
1849 if (regamma->flags.bits.applyDegamma == 1) {
1850 apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer);
1851 copy_rgb_regamma_to_coordinates_x(coordinates_x,
1852 MAX_HW_POINTS, rgb_regamma);
1855 interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
1856 regamma->flags.bits.applyDegamma, tf_pts);
1858 // no custom HDR curves!
1859 tf_pts->end_exponent = 0;
1860 tf_pts->x_point_at_y1_red = 1;
1861 tf_pts->x_point_at_y1_green = 1;
1862 tf_pts->x_point_at_y1_blue = 1;
1864 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
1865 apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts);
1867 // this function just clamps output to 0-1
1868 build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
1873 rgb_regamma_alloc_fail:
1875 rgb_user_alloc_fail:
1879 bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps,
1880 struct dc_transfer_func *input_tf,
1881 const struct dc_gamma *ramp, bool mapUserRamp)
1883 struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts;
1884 struct dividers dividers;
1885 struct pwl_float_data *rgb_user = NULL;
1886 struct pwl_float_data_ex *curve = NULL;
1887 struct gamma_pixel *axis_x = NULL;
1888 struct pixel_gamma_point *coeff = NULL;
1889 enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
1893 if (input_tf->type == TF_TYPE_BYPASS)
1896 /* we can use hardcoded curve for plain SRGB TF
1897 * If linear, it's bypass if on user ramp
1899 if (input_tf->type == TF_TYPE_PREDEFINED) {
1900 if ((input_tf->tf == TRANSFER_FUNCTION_SRGB ||
1901 input_tf->tf == TRANSFER_FUNCTION_LINEAR) &&
1905 if (dc_caps != NULL &&
1906 dc_caps->dpp.dcn_arch == 1) {
1908 if (input_tf->tf == TRANSFER_FUNCTION_PQ &&
1909 dc_caps->dpp.dgam_rom_caps.pq == 1)
1912 if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
1913 dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1)
1916 // HLG OOTF not accounted for
1917 if (input_tf->tf == TRANSFER_FUNCTION_HLG &&
1918 dc_caps->dpp.dgam_rom_caps.hlg == 1)
1923 input_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
1925 if (mapUserRamp && ramp && ramp->type == GAMMA_RGB_256) {
1926 rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
1930 goto rgb_user_alloc_fail;
1932 axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
1935 goto axis_x_alloc_fail;
1937 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
1938 dividers.divider2 = dc_fixpt_from_int(2);
1939 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
1941 build_evenly_distributed_points(
1946 scale_gamma(rgb_user, ramp, dividers);
1949 curve = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*curve),
1952 goto curve_alloc_fail;
1954 coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
1957 goto coeff_alloc_fail;
1961 if (tf == TRANSFER_FUNCTION_PQ)
1965 else if (tf == TRANSFER_FUNCTION_SRGB ||
1966 tf == TRANSFER_FUNCTION_BT709 ||
1967 tf == TRANSFER_FUNCTION_GAMMA22 ||
1968 tf == TRANSFER_FUNCTION_GAMMA24 ||
1969 tf == TRANSFER_FUNCTION_GAMMA26)
1970 build_degamma(curve,
1974 else if (tf == TRANSFER_FUNCTION_HLG)
1975 build_hlg_degamma(curve,
1979 else if (tf == TRANSFER_FUNCTION_LINEAR) {
1980 // just copy coordinates_x into curve
1982 while (i != MAX_HW_POINTS + 1) {
1983 curve[i].r = coordinates_x[i].x;
1984 curve[i].g = curve[i].r;
1985 curve[i].b = curve[i].r;
1989 goto invalid_tf_fail;
1991 tf_pts->end_exponent = 0;
1992 tf_pts->x_point_at_y1_red = 1;
1993 tf_pts->x_point_at_y1_green = 1;
1994 tf_pts->x_point_at_y1_blue = 1;
1996 if (input_tf->tf == TRANSFER_FUNCTION_PQ) {
1997 /* just copy current rgb_regamma into tf_pts */
1998 struct pwl_float_data_ex *curvePt = curve;
2001 while (i <= MAX_HW_POINTS) {
2002 tf_pts->red[i] = curvePt->r;
2003 tf_pts->green[i] = curvePt->g;
2004 tf_pts->blue[i] = curvePt->b;
2010 map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2011 coordinates_x, axis_x, curve,
2012 MAX_HW_POINTS, tf_pts,
2013 mapUserRamp && ramp && ramp->type == GAMMA_RGB_256,
2019 if (ramp && ramp->type == GAMMA_CUSTOM)
2020 apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2032 rgb_user_alloc_fail:
2037 static bool calculate_curve(enum dc_transfer_func_predefined trans,
2038 struct dc_transfer_func_distributed_points *points,
2039 struct pwl_float_data_ex *rgb_regamma,
2040 const struct hdr_tm_params *fs_params,
2041 uint32_t sdr_ref_white_level,
2042 struct calculate_buffer *cal_buffer)
2047 if (trans == TRANSFER_FUNCTION_UNITY ||
2048 trans == TRANSFER_FUNCTION_LINEAR) {
2049 points->end_exponent = 0;
2050 points->x_point_at_y1_red = 1;
2051 points->x_point_at_y1_green = 1;
2052 points->x_point_at_y1_blue = 1;
2054 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2055 rgb_regamma[i].r = coordinates_x[i].x;
2056 rgb_regamma[i].g = coordinates_x[i].x;
2057 rgb_regamma[i].b = coordinates_x[i].x;
2061 } else if (trans == TRANSFER_FUNCTION_PQ) {
2062 points->end_exponent = 7;
2063 points->x_point_at_y1_red = 125;
2064 points->x_point_at_y1_green = 125;
2065 points->x_point_at_y1_blue = 125;
2067 build_pq(rgb_regamma,
2070 sdr_ref_white_level);
2073 } else if (trans == TRANSFER_FUNCTION_GAMMA22 &&
2074 fs_params != NULL && fs_params->skip_tm == 0) {
2075 build_freesync_hdr(rgb_regamma,
2082 } else if (trans == TRANSFER_FUNCTION_HLG) {
2083 points->end_exponent = 4;
2084 points->x_point_at_y1_red = 12;
2085 points->x_point_at_y1_green = 12;
2086 points->x_point_at_y1_blue = 12;
2088 build_hlg_regamma(rgb_regamma,
2095 // trans == TRANSFER_FUNCTION_SRGB
2096 // trans == TRANSFER_FUNCTION_BT709
2097 // trans == TRANSFER_FUNCTION_GAMMA22
2098 // trans == TRANSFER_FUNCTION_GAMMA24
2099 // trans == TRANSFER_FUNCTION_GAMMA26
2100 points->end_exponent = 0;
2101 points->x_point_at_y1_red = 1;
2102 points->x_point_at_y1_green = 1;
2103 points->x_point_at_y1_blue = 1;
2105 build_regamma(rgb_regamma,
2117 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
2118 const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed,
2119 const struct hdr_tm_params *fs_params,
2120 struct calculate_buffer *cal_buffer)
2122 struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
2123 struct dividers dividers;
2125 struct pwl_float_data *rgb_user = NULL;
2126 struct pwl_float_data_ex *rgb_regamma = NULL;
2127 struct gamma_pixel *axis_x = NULL;
2128 struct pixel_gamma_point *coeff = NULL;
2129 enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
2130 bool doClamping = true;
2133 if (output_tf->type == TF_TYPE_BYPASS)
2136 /* we can use hardcoded curve for plain SRGB TF */
2137 if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true &&
2138 output_tf->tf == TRANSFER_FUNCTION_SRGB) {
2141 if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
2142 (!mapUserRamp && ramp->type == GAMMA_RGB_256))
2146 output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
2148 if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
2149 (mapUserRamp || ramp->type != GAMMA_RGB_256)) {
2150 rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
2154 goto rgb_user_alloc_fail;
2156 axis_x = kvcalloc(ramp->num_entries + 3, sizeof(*axis_x),
2159 goto axis_x_alloc_fail;
2161 dividers.divider1 = dc_fixpt_from_fraction(3, 2);
2162 dividers.divider2 = dc_fixpt_from_int(2);
2163 dividers.divider3 = dc_fixpt_from_fraction(5, 2);
2165 build_evenly_distributed_points(
2170 if (ramp->type == GAMMA_RGB_256 && mapUserRamp)
2171 scale_gamma(rgb_user, ramp, dividers);
2172 else if (ramp->type == GAMMA_RGB_FLOAT_1024)
2173 scale_gamma_dx(rgb_user, ramp, dividers);
2176 rgb_regamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2177 sizeof(*rgb_regamma),
2180 goto rgb_regamma_alloc_fail;
2182 coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
2185 goto coeff_alloc_fail;
2189 ret = calculate_curve(tf,
2193 output_tf->sdr_ref_white_level,
2197 doClamping = !(output_tf->tf == TRANSFER_FUNCTION_GAMMA22 &&
2198 fs_params != NULL && fs_params->skip_tm == 0);
2200 map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
2201 coordinates_x, axis_x, rgb_regamma,
2202 MAX_HW_POINTS, tf_pts,
2203 (mapUserRamp || (ramp && ramp->type != GAMMA_RGB_256)) &&
2204 (ramp && ramp->type != GAMMA_CS_TFM_1D),
2207 if (ramp && ramp->type == GAMMA_CS_TFM_1D)
2208 apply_lut_1d(ramp, MAX_HW_POINTS, tf_pts);
2213 kvfree(rgb_regamma);
2214 rgb_regamma_alloc_fail:
2218 rgb_user_alloc_fail:
2222 bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
2223 struct dc_transfer_func_distributed_points *points)
2227 struct pwl_float_data_ex *rgb_degamma = NULL;
2229 if (trans == TRANSFER_FUNCTION_UNITY ||
2230 trans == TRANSFER_FUNCTION_LINEAR) {
2232 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2233 points->red[i] = coordinates_x[i].x;
2234 points->green[i] = coordinates_x[i].x;
2235 points->blue[i] = coordinates_x[i].x;
2238 } else if (trans == TRANSFER_FUNCTION_PQ) {
2239 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2240 sizeof(*rgb_degamma),
2243 goto rgb_degamma_alloc_fail;
2246 build_de_pq(rgb_degamma,
2249 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2250 points->red[i] = rgb_degamma[i].r;
2251 points->green[i] = rgb_degamma[i].g;
2252 points->blue[i] = rgb_degamma[i].b;
2256 kvfree(rgb_degamma);
2257 } else if (trans == TRANSFER_FUNCTION_SRGB ||
2258 trans == TRANSFER_FUNCTION_BT709 ||
2259 trans == TRANSFER_FUNCTION_GAMMA22 ||
2260 trans == TRANSFER_FUNCTION_GAMMA24 ||
2261 trans == TRANSFER_FUNCTION_GAMMA26) {
2262 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2263 sizeof(*rgb_degamma),
2266 goto rgb_degamma_alloc_fail;
2268 build_degamma(rgb_degamma,
2272 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2273 points->red[i] = rgb_degamma[i].r;
2274 points->green[i] = rgb_degamma[i].g;
2275 points->blue[i] = rgb_degamma[i].b;
2279 kvfree(rgb_degamma);
2280 } else if (trans == TRANSFER_FUNCTION_HLG) {
2281 rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
2282 sizeof(*rgb_degamma),
2285 goto rgb_degamma_alloc_fail;
2287 build_hlg_degamma(rgb_degamma,
2291 for (i = 0; i <= MAX_HW_POINTS ; i++) {
2292 points->red[i] = rgb_degamma[i].r;
2293 points->green[i] = rgb_degamma[i].g;
2294 points->blue[i] = rgb_degamma[i].b;
2297 kvfree(rgb_degamma);
2299 points->end_exponent = 0;
2300 points->x_point_at_y1_red = 1;
2301 points->x_point_at_y1_green = 1;
2302 points->x_point_at_y1_blue = 1;
2304 rgb_degamma_alloc_fail: