1 // SPDX-License-Identifier: LGPL-2.1+
3 * Copyright 2016 Tom aan de Wiel
4 * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
12 #include <linux/string.h>
13 #include "codec-fwht.h"
16 * Note: bit 0 of the header must always be 0. Otherwise it cannot
17 * be guaranteed that the magic 8 byte sequence (see below) can
18 * never occur in the rlc output.
20 #define PFRAME_BIT BIT(15)
21 #define DUPS_MASK 0x1ffe
28 static const uint8_t zigzag
[64] = {
34 5, 12, 19, 26, 33, 40,
35 6, 13, 20, 27, 34, 41, 48,
36 7, 14, 21, 28, 35, 42, 49, 56,
37 15, 22, 29, 36, 43, 50, 57,
38 23, 30, 37, 44, 51, 58,
47 static int rlc(const s16
*in
, __be16
*output
, int blocktype
)
55 /* read in block from framebuffer */
59 for (y
= 0; y
< 8; y
++) {
60 for (x
= 0; x
< 8; x
++) {
66 /* keep track of amount of trailing zeros */
67 for (i
= 63; i
>= 0 && !block
[zigzag
[i
]]; i
--)
70 *output
++ = (blocktype
== PBLOCK
? htons(PFRAME_BIT
) : 0);
73 to_encode
= 8 * 8 - (lastzero_run
> 14 ? lastzero_run
: 0);
76 while (i
< to_encode
) {
80 /* count leading zeros */
81 while ((tmp
= block
[zigzag
[i
]]) == 0 && cnt
< 14) {
89 /* 4 bits for run, 12 for coefficient (quantization by 4) */
90 *output
++ = htons((cnt
| tmp
<< 4));
94 if (lastzero_run
> 14) {
95 *output
= htons(ALL_ZEROS
| 0);
103 * This function will worst-case increase rlc_in by 65*2 bytes:
104 * one s16 value for the header and 8 * 8 coefficients of type s16.
106 static s16
derlc(const __be16
**rlc_in
, s16
*dwht_out
)
109 const __be16
*input
= *rlc_in
;
110 s16 ret
= ntohs(*input
++);
112 s16 block
[8 * 8 + 16];
117 * Now de-compress, it expands one byte to up to 15 bytes
118 * (or fills the remainder of the 64 bytes with zeroes if it
119 * is the last byte to expand).
121 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
122 * allow for overflow if the incoming data was malformed.
124 while (dec_count
< 8 * 8) {
125 s16 in
= ntohs(*input
++);
126 int length
= in
& 0xf;
129 /* fill remainder with zeros */
131 for (i
= 0; i
< 64 - dec_count
; i
++)
136 for (i
= 0; i
< length
; i
++)
139 dec_count
+= length
+ 1;
144 for (i
= 0; i
< 64; i
++) {
149 dwht_out
[x
+ y
* 8] = *wp
++;
155 static const int quant_table
[] = {
156 2, 2, 2, 2, 2, 2, 2, 2,
157 2, 2, 2, 2, 2, 2, 2, 2,
158 2, 2, 2, 2, 2, 2, 2, 3,
159 2, 2, 2, 2, 2, 2, 3, 6,
160 2, 2, 2, 2, 2, 3, 6, 6,
161 2, 2, 2, 2, 3, 6, 6, 6,
162 2, 2, 2, 3, 6, 6, 6, 6,
163 2, 2, 3, 6, 6, 6, 6, 8,
166 static const int quant_table_p
[] = {
167 3, 3, 3, 3, 3, 3, 3, 3,
168 3, 3, 3, 3, 3, 3, 3, 3,
169 3, 3, 3, 3, 3, 3, 3, 3,
170 3, 3, 3, 3, 3, 3, 3, 6,
171 3, 3, 3, 3, 3, 3, 6, 6,
172 3, 3, 3, 3, 3, 6, 6, 9,
173 3, 3, 3, 3, 6, 6, 9, 9,
174 3, 3, 3, 6, 6, 9, 9, 10,
177 static void quantize_intra(s16
*coeff
, s16
*de_coeff
, u16 qp
)
179 const int *quant
= quant_table
;
182 for (j
= 0; j
< 8; j
++) {
183 for (i
= 0; i
< 8; i
++, quant
++, coeff
++, de_coeff
++) {
185 if (*coeff
>= -qp
&& *coeff
<= qp
)
186 *coeff
= *de_coeff
= 0;
188 *de_coeff
= *coeff
<< *quant
;
193 static void dequantize_intra(s16
*coeff
)
195 const int *quant
= quant_table
;
198 for (j
= 0; j
< 8; j
++)
199 for (i
= 0; i
< 8; i
++, quant
++, coeff
++)
203 static void quantize_inter(s16
*coeff
, s16
*de_coeff
, u16 qp
)
205 const int *quant
= quant_table_p
;
208 for (j
= 0; j
< 8; j
++) {
209 for (i
= 0; i
< 8; i
++, quant
++, coeff
++, de_coeff
++) {
211 if (*coeff
>= -qp
&& *coeff
<= qp
)
212 *coeff
= *de_coeff
= 0;
214 *de_coeff
= *coeff
<< *quant
;
219 static void dequantize_inter(s16
*coeff
)
221 const int *quant
= quant_table_p
;
224 for (j
= 0; j
< 8; j
++)
225 for (i
= 0; i
< 8; i
++, quant
++, coeff
++)
229 static void fwht(const u8
*block
, s16
*output_block
, unsigned int stride
,
230 unsigned int input_step
, bool intra
)
232 /* we'll need more than 8 bits for the transformed coefficients */
233 s32 workspace1
[8], workspace2
[8];
234 const u8
*tmp
= block
;
235 s16
*out
= output_block
;
236 int add
= intra
? 256 : 0;
240 stride
*= input_step
;
242 for (i
= 0; i
< 8; i
++, tmp
+= stride
, out
+= 8) {
243 switch (input_step
) {
245 workspace1
[0] = tmp
[0] + tmp
[1] - add
;
246 workspace1
[1] = tmp
[0] - tmp
[1];
248 workspace1
[2] = tmp
[2] + tmp
[3] - add
;
249 workspace1
[3] = tmp
[2] - tmp
[3];
251 workspace1
[4] = tmp
[4] + tmp
[5] - add
;
252 workspace1
[5] = tmp
[4] - tmp
[5];
254 workspace1
[6] = tmp
[6] + tmp
[7] - add
;
255 workspace1
[7] = tmp
[6] - tmp
[7];
258 workspace1
[0] = tmp
[0] + tmp
[2] - add
;
259 workspace1
[1] = tmp
[0] - tmp
[2];
261 workspace1
[2] = tmp
[4] + tmp
[6] - add
;
262 workspace1
[3] = tmp
[4] - tmp
[6];
264 workspace1
[4] = tmp
[8] + tmp
[10] - add
;
265 workspace1
[5] = tmp
[8] - tmp
[10];
267 workspace1
[6] = tmp
[12] + tmp
[14] - add
;
268 workspace1
[7] = tmp
[12] - tmp
[14];
271 workspace1
[0] = tmp
[0] + tmp
[3] - add
;
272 workspace1
[1] = tmp
[0] - tmp
[3];
274 workspace1
[2] = tmp
[6] + tmp
[9] - add
;
275 workspace1
[3] = tmp
[6] - tmp
[9];
277 workspace1
[4] = tmp
[12] + tmp
[15] - add
;
278 workspace1
[5] = tmp
[12] - tmp
[15];
280 workspace1
[6] = tmp
[18] + tmp
[21] - add
;
281 workspace1
[7] = tmp
[18] - tmp
[21];
284 workspace1
[0] = tmp
[0] + tmp
[4] - add
;
285 workspace1
[1] = tmp
[0] - tmp
[4];
287 workspace1
[2] = tmp
[8] + tmp
[12] - add
;
288 workspace1
[3] = tmp
[8] - tmp
[12];
290 workspace1
[4] = tmp
[16] + tmp
[20] - add
;
291 workspace1
[5] = tmp
[16] - tmp
[20];
293 workspace1
[6] = tmp
[24] + tmp
[28] - add
;
294 workspace1
[7] = tmp
[24] - tmp
[28];
299 workspace2
[0] = workspace1
[0] + workspace1
[2];
300 workspace2
[1] = workspace1
[0] - workspace1
[2];
301 workspace2
[2] = workspace1
[1] - workspace1
[3];
302 workspace2
[3] = workspace1
[1] + workspace1
[3];
304 workspace2
[4] = workspace1
[4] + workspace1
[6];
305 workspace2
[5] = workspace1
[4] - workspace1
[6];
306 workspace2
[6] = workspace1
[5] - workspace1
[7];
307 workspace2
[7] = workspace1
[5] + workspace1
[7];
310 out
[0] = workspace2
[0] + workspace2
[4];
311 out
[1] = workspace2
[0] - workspace2
[4];
312 out
[2] = workspace2
[1] - workspace2
[5];
313 out
[3] = workspace2
[1] + workspace2
[5];
314 out
[4] = workspace2
[2] + workspace2
[6];
315 out
[5] = workspace2
[2] - workspace2
[6];
316 out
[6] = workspace2
[3] - workspace2
[7];
317 out
[7] = workspace2
[3] + workspace2
[7];
322 for (i
= 0; i
< 8; i
++, out
++) {
324 workspace1
[0] = out
[0] + out
[1 * 8];
325 workspace1
[1] = out
[0] - out
[1 * 8];
327 workspace1
[2] = out
[2 * 8] + out
[3 * 8];
328 workspace1
[3] = out
[2 * 8] - out
[3 * 8];
330 workspace1
[4] = out
[4 * 8] + out
[5 * 8];
331 workspace1
[5] = out
[4 * 8] - out
[5 * 8];
333 workspace1
[6] = out
[6 * 8] + out
[7 * 8];
334 workspace1
[7] = out
[6 * 8] - out
[7 * 8];
337 workspace2
[0] = workspace1
[0] + workspace1
[2];
338 workspace2
[1] = workspace1
[0] - workspace1
[2];
339 workspace2
[2] = workspace1
[1] - workspace1
[3];
340 workspace2
[3] = workspace1
[1] + workspace1
[3];
342 workspace2
[4] = workspace1
[4] + workspace1
[6];
343 workspace2
[5] = workspace1
[4] - workspace1
[6];
344 workspace2
[6] = workspace1
[5] - workspace1
[7];
345 workspace2
[7] = workspace1
[5] + workspace1
[7];
347 out
[0 * 8] = workspace2
[0] + workspace2
[4];
348 out
[1 * 8] = workspace2
[0] - workspace2
[4];
349 out
[2 * 8] = workspace2
[1] - workspace2
[5];
350 out
[3 * 8] = workspace2
[1] + workspace2
[5];
351 out
[4 * 8] = workspace2
[2] + workspace2
[6];
352 out
[5 * 8] = workspace2
[2] - workspace2
[6];
353 out
[6 * 8] = workspace2
[3] - workspace2
[7];
354 out
[7 * 8] = workspace2
[3] + workspace2
[7];
359 * Not the nicest way of doing it, but P-blocks get twice the range of
360 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
361 * Furthermore values can be negative... This is just a version that
362 * works with 16 signed data
364 static void fwht16(const s16
*block
, s16
*output_block
, int stride
, int intra
)
366 /* we'll need more than 8 bits for the transformed coefficients */
367 s32 workspace1
[8], workspace2
[8];
368 const s16
*tmp
= block
;
369 s16
*out
= output_block
;
372 for (i
= 0; i
< 8; i
++, tmp
+= stride
, out
+= 8) {
374 workspace1
[0] = tmp
[0] + tmp
[1];
375 workspace1
[1] = tmp
[0] - tmp
[1];
377 workspace1
[2] = tmp
[2] + tmp
[3];
378 workspace1
[3] = tmp
[2] - tmp
[3];
380 workspace1
[4] = tmp
[4] + tmp
[5];
381 workspace1
[5] = tmp
[4] - tmp
[5];
383 workspace1
[6] = tmp
[6] + tmp
[7];
384 workspace1
[7] = tmp
[6] - tmp
[7];
387 workspace2
[0] = workspace1
[0] + workspace1
[2];
388 workspace2
[1] = workspace1
[0] - workspace1
[2];
389 workspace2
[2] = workspace1
[1] - workspace1
[3];
390 workspace2
[3] = workspace1
[1] + workspace1
[3];
392 workspace2
[4] = workspace1
[4] + workspace1
[6];
393 workspace2
[5] = workspace1
[4] - workspace1
[6];
394 workspace2
[6] = workspace1
[5] - workspace1
[7];
395 workspace2
[7] = workspace1
[5] + workspace1
[7];
398 out
[0] = workspace2
[0] + workspace2
[4];
399 out
[1] = workspace2
[0] - workspace2
[4];
400 out
[2] = workspace2
[1] - workspace2
[5];
401 out
[3] = workspace2
[1] + workspace2
[5];
402 out
[4] = workspace2
[2] + workspace2
[6];
403 out
[5] = workspace2
[2] - workspace2
[6];
404 out
[6] = workspace2
[3] - workspace2
[7];
405 out
[7] = workspace2
[3] + workspace2
[7];
410 for (i
= 0; i
< 8; i
++, out
++) {
412 workspace1
[0] = out
[0] + out
[1*8];
413 workspace1
[1] = out
[0] - out
[1*8];
415 workspace1
[2] = out
[2*8] + out
[3*8];
416 workspace1
[3] = out
[2*8] - out
[3*8];
418 workspace1
[4] = out
[4*8] + out
[5*8];
419 workspace1
[5] = out
[4*8] - out
[5*8];
421 workspace1
[6] = out
[6*8] + out
[7*8];
422 workspace1
[7] = out
[6*8] - out
[7*8];
425 workspace2
[0] = workspace1
[0] + workspace1
[2];
426 workspace2
[1] = workspace1
[0] - workspace1
[2];
427 workspace2
[2] = workspace1
[1] - workspace1
[3];
428 workspace2
[3] = workspace1
[1] + workspace1
[3];
430 workspace2
[4] = workspace1
[4] + workspace1
[6];
431 workspace2
[5] = workspace1
[4] - workspace1
[6];
432 workspace2
[6] = workspace1
[5] - workspace1
[7];
433 workspace2
[7] = workspace1
[5] + workspace1
[7];
436 out
[0*8] = workspace2
[0] + workspace2
[4];
437 out
[1*8] = workspace2
[0] - workspace2
[4];
438 out
[2*8] = workspace2
[1] - workspace2
[5];
439 out
[3*8] = workspace2
[1] + workspace2
[5];
440 out
[4*8] = workspace2
[2] + workspace2
[6];
441 out
[5*8] = workspace2
[2] - workspace2
[6];
442 out
[6*8] = workspace2
[3] - workspace2
[7];
443 out
[7*8] = workspace2
[3] + workspace2
[7];
447 static void ifwht(const s16
*block
, s16
*output_block
, int intra
)
450 * we'll need more than 8 bits for the transformed coefficients
451 * use native unit of cpu
453 int workspace1
[8], workspace2
[8];
454 int inter
= intra
? 0 : 1;
455 const s16
*tmp
= block
;
456 s16
*out
= output_block
;
459 for (i
= 0; i
< 8; i
++, tmp
+= 8, out
+= 8) {
461 workspace1
[0] = tmp
[0] + tmp
[1];
462 workspace1
[1] = tmp
[0] - tmp
[1];
464 workspace1
[2] = tmp
[2] + tmp
[3];
465 workspace1
[3] = tmp
[2] - tmp
[3];
467 workspace1
[4] = tmp
[4] + tmp
[5];
468 workspace1
[5] = tmp
[4] - tmp
[5];
470 workspace1
[6] = tmp
[6] + tmp
[7];
471 workspace1
[7] = tmp
[6] - tmp
[7];
474 workspace2
[0] = workspace1
[0] + workspace1
[2];
475 workspace2
[1] = workspace1
[0] - workspace1
[2];
476 workspace2
[2] = workspace1
[1] - workspace1
[3];
477 workspace2
[3] = workspace1
[1] + workspace1
[3];
479 workspace2
[4] = workspace1
[4] + workspace1
[6];
480 workspace2
[5] = workspace1
[4] - workspace1
[6];
481 workspace2
[6] = workspace1
[5] - workspace1
[7];
482 workspace2
[7] = workspace1
[5] + workspace1
[7];
485 out
[0] = workspace2
[0] + workspace2
[4];
486 out
[1] = workspace2
[0] - workspace2
[4];
487 out
[2] = workspace2
[1] - workspace2
[5];
488 out
[3] = workspace2
[1] + workspace2
[5];
489 out
[4] = workspace2
[2] + workspace2
[6];
490 out
[5] = workspace2
[2] - workspace2
[6];
491 out
[6] = workspace2
[3] - workspace2
[7];
492 out
[7] = workspace2
[3] + workspace2
[7];
497 for (i
= 0; i
< 8; i
++, out
++) {
499 workspace1
[0] = out
[0] + out
[1 * 8];
500 workspace1
[1] = out
[0] - out
[1 * 8];
502 workspace1
[2] = out
[2 * 8] + out
[3 * 8];
503 workspace1
[3] = out
[2 * 8] - out
[3 * 8];
505 workspace1
[4] = out
[4 * 8] + out
[5 * 8];
506 workspace1
[5] = out
[4 * 8] - out
[5 * 8];
508 workspace1
[6] = out
[6 * 8] + out
[7 * 8];
509 workspace1
[7] = out
[6 * 8] - out
[7 * 8];
512 workspace2
[0] = workspace1
[0] + workspace1
[2];
513 workspace2
[1] = workspace1
[0] - workspace1
[2];
514 workspace2
[2] = workspace1
[1] - workspace1
[3];
515 workspace2
[3] = workspace1
[1] + workspace1
[3];
517 workspace2
[4] = workspace1
[4] + workspace1
[6];
518 workspace2
[5] = workspace1
[4] - workspace1
[6];
519 workspace2
[6] = workspace1
[5] - workspace1
[7];
520 workspace2
[7] = workspace1
[5] + workspace1
[7];
526 out
[0 * 8] = workspace2
[0] + workspace2
[4];
527 out
[1 * 8] = workspace2
[0] - workspace2
[4];
528 out
[2 * 8] = workspace2
[1] - workspace2
[5];
529 out
[3 * 8] = workspace2
[1] + workspace2
[5];
530 out
[4 * 8] = workspace2
[2] + workspace2
[6];
531 out
[5 * 8] = workspace2
[2] - workspace2
[6];
532 out
[6 * 8] = workspace2
[3] - workspace2
[7];
533 out
[7 * 8] = workspace2
[3] + workspace2
[7];
535 for (d
= 0; d
< 8; d
++)
540 out
[0 * 8] = workspace2
[0] + workspace2
[4];
541 out
[1 * 8] = workspace2
[0] - workspace2
[4];
542 out
[2 * 8] = workspace2
[1] - workspace2
[5];
543 out
[3 * 8] = workspace2
[1] + workspace2
[5];
544 out
[4 * 8] = workspace2
[2] + workspace2
[6];
545 out
[5 * 8] = workspace2
[2] - workspace2
[6];
546 out
[6 * 8] = workspace2
[3] - workspace2
[7];
547 out
[7 * 8] = workspace2
[3] + workspace2
[7];
549 for (d
= 0; d
< 8; d
++) {
557 static void fill_encoder_block(const u8
*input
, s16
*dst
,
558 unsigned int stride
, unsigned int input_step
)
562 for (i
= 0; i
< 8; i
++) {
563 for (j
= 0; j
< 8; j
++, input
+= input_step
)
565 input
+= (stride
- 8) * input_step
;
569 static int var_intra(const s16
*input
)
573 const s16
*tmp
= input
;
576 for (i
= 0; i
< 8 * 8; i
++, tmp
++)
580 for (i
= 0; i
< 8 * 8; i
++, tmp
++)
581 ret
+= (*tmp
- mean
) < 0 ? -(*tmp
- mean
) : (*tmp
- mean
);
585 static int var_inter(const s16
*old
, const s16
*new)
590 for (i
= 0; i
< 8 * 8; i
++, old
++, new++)
591 ret
+= (*old
- *new) < 0 ? -(*old
- *new) : (*old
- *new);
595 static int decide_blocktype(const u8
*cur
, const u8
*reference
,
596 s16
*deltablock
, unsigned int stride
,
597 unsigned int input_step
)
606 fill_encoder_block(cur
, tmp
, stride
, input_step
);
607 fill_encoder_block(reference
, old
, 8, 1);
608 vari
= var_intra(tmp
);
610 for (k
= 0; k
< 8; k
++) {
611 for (l
= 0; l
< 8; l
++) {
612 *deltablock
= *work
- *reference
;
619 vard
= var_inter(old
, tmp
);
620 return vari
<= vard
? IBLOCK
: PBLOCK
;
623 static void fill_decoder_block(u8
*dst
, const s16
*input
, int stride
)
627 for (i
= 0; i
< 8; i
++) {
628 for (j
= 0; j
< 8; j
++, input
++, dst
++) {
631 else if (*input
> 255)
640 static void add_deltas(s16
*deltas
, const u8
*ref
, int stride
)
644 for (k
= 0; k
< 8; k
++) {
645 for (l
= 0; l
< 8; l
++) {
648 * Due to quantizing, it might possible that the
649 * decoded coefficients are slightly out of range
653 else if (*deltas
> 255)
661 static u32
encode_plane(u8
*input
, u8
*refp
, __be16
**rlco
, __be16
*rlco_max
,
662 struct fwht_cframe
*cf
, u32 height
, u32 width
,
663 unsigned int input_step
,
664 bool is_intra
, bool next_is_intra
)
666 u8
*input_start
= input
;
667 __be16
*rlco_start
= *rlco
;
669 __be16 pframe_bit
= htons(PFRAME_BIT
);
671 unsigned int last_size
= 0;
674 for (j
= 0; j
< height
/ 8; j
++) {
675 for (i
= 0; i
< width
/ 8; i
++) {
676 /* intra code, first frame is always intra coded. */
677 int blocktype
= IBLOCK
;
681 blocktype
= decide_blocktype(input
, refp
,
682 deltablock
, width
, input_step
);
683 if (blocktype
== IBLOCK
) {
684 fwht(input
, cf
->coeffs
, width
, input_step
, 1);
685 quantize_intra(cf
->coeffs
, cf
->de_coeffs
,
689 encoding
|= FWHT_FRAME_PCODED
;
690 fwht16(deltablock
, cf
->coeffs
, 8, 0);
691 quantize_inter(cf
->coeffs
, cf
->de_coeffs
,
694 if (!next_is_intra
) {
695 ifwht(cf
->de_coeffs
, cf
->de_fwht
, blocktype
);
697 if (blocktype
== PBLOCK
)
698 add_deltas(cf
->de_fwht
, refp
, 8);
699 fill_decoder_block(refp
, cf
->de_fwht
, 8);
702 input
+= 8 * input_step
;
705 size
= rlc(cf
->coeffs
, *rlco
, blocktype
);
706 if (last_size
== size
&&
707 !memcmp(*rlco
+ 1, *rlco
- size
+ 1, 2 * size
- 2)) {
708 __be16
*last_rlco
= *rlco
- size
;
709 s16 hdr
= ntohs(*last_rlco
);
711 if (!((*last_rlco
^ **rlco
) & pframe_bit
) &&
712 (hdr
& DUPS_MASK
) < DUPS_MASK
)
713 *last_rlco
= htons(hdr
+ 2);
719 if (*rlco
>= rlco_max
) {
720 encoding
|= FWHT_FRAME_UNENCODED
;
725 input
+= width
* 7 * input_step
;
729 if (encoding
& FWHT_FRAME_UNENCODED
) {
730 u8
*out
= (u8
*)rlco_start
;
734 * The compressed stream should never contain the magic
735 * header, so when we copy the YUV data we replace 0xff
736 * by 0xfe. Since YUV is limited range such values
737 * shouldn't appear anyway.
739 for (i
= 0; i
< height
* width
; i
++, input
+= input_step
)
740 *out
++ = (*input
== 0xff) ? 0xfe : *input
;
741 *rlco
= (__be16
*)out
;
742 encoding
&= ~FWHT_FRAME_PCODED
;
747 u32
fwht_encode_frame(struct fwht_raw_frame
*frm
,
748 struct fwht_raw_frame
*ref_frm
,
749 struct fwht_cframe
*cf
,
750 bool is_intra
, bool next_is_intra
)
752 unsigned int size
= frm
->height
* frm
->width
;
753 __be16
*rlco
= cf
->rlc_data
;
757 rlco_max
= rlco
+ size
/ 2 - 256;
758 encoding
= encode_plane(frm
->luma
, ref_frm
->luma
, &rlco
, rlco_max
, cf
,
759 frm
->height
, frm
->width
,
760 frm
->luma_alpha_step
, is_intra
, next_is_intra
);
761 if (encoding
& FWHT_FRAME_UNENCODED
)
762 encoding
|= FWHT_LUMA_UNENCODED
;
763 encoding
&= ~FWHT_FRAME_UNENCODED
;
765 if (frm
->components_num
>= 3) {
766 u32 chroma_h
= frm
->height
/ frm
->height_div
;
767 u32 chroma_w
= frm
->width
/ frm
->width_div
;
768 unsigned int chroma_size
= chroma_h
* chroma_w
;
770 rlco_max
= rlco
+ chroma_size
/ 2 - 256;
771 encoding
|= encode_plane(frm
->cb
, ref_frm
->cb
, &rlco
, rlco_max
,
772 cf
, chroma_h
, chroma_w
,
774 is_intra
, next_is_intra
);
775 if (encoding
& FWHT_FRAME_UNENCODED
)
776 encoding
|= FWHT_CB_UNENCODED
;
777 encoding
&= ~FWHT_FRAME_UNENCODED
;
778 rlco_max
= rlco
+ chroma_size
/ 2 - 256;
779 encoding
|= encode_plane(frm
->cr
, ref_frm
->cr
, &rlco
, rlco_max
,
780 cf
, chroma_h
, chroma_w
,
782 is_intra
, next_is_intra
);
783 if (encoding
& FWHT_FRAME_UNENCODED
)
784 encoding
|= FWHT_CR_UNENCODED
;
785 encoding
&= ~FWHT_FRAME_UNENCODED
;
788 if (frm
->components_num
== 4) {
789 rlco_max
= rlco
+ size
/ 2 - 256;
790 encoding
= encode_plane(frm
->alpha
, ref_frm
->alpha
, &rlco
,
791 rlco_max
, cf
, frm
->height
, frm
->width
,
792 frm
->luma_alpha_step
,
793 is_intra
, next_is_intra
);
794 if (encoding
& FWHT_FRAME_UNENCODED
)
795 encoding
|= FWHT_ALPHA_UNENCODED
;
796 encoding
&= ~FWHT_FRAME_UNENCODED
;
799 cf
->size
= (rlco
- cf
->rlc_data
) * sizeof(*rlco
);
803 static void decode_plane(struct fwht_cframe
*cf
, const __be16
**rlco
, u8
*ref
,
804 u32 height
, u32 width
, bool uncompressed
)
806 unsigned int copies
= 0;
812 memcpy(ref
, *rlco
, width
* height
);
813 *rlco
+= width
* height
/ 2;
818 * When decoding each macroblock the rlco pointer will be increased
819 * by 65 * 2 bytes worst-case.
820 * To avoid overflow the buffer has to be 65/64th of the actual raw
821 * image size, just in case someone feeds it malicious data.
823 for (j
= 0; j
< height
/ 8; j
++) {
824 for (i
= 0; i
< width
/ 8; i
++) {
825 u8
*refp
= ref
+ j
* 8 * width
+ i
* 8;
828 memcpy(cf
->de_fwht
, copy
, sizeof(copy
));
829 if (stat
& PFRAME_BIT
)
830 add_deltas(cf
->de_fwht
, refp
, width
);
831 fill_decoder_block(refp
, cf
->de_fwht
, width
);
836 stat
= derlc(rlco
, cf
->coeffs
);
838 if (stat
& PFRAME_BIT
)
839 dequantize_inter(cf
->coeffs
);
841 dequantize_intra(cf
->coeffs
);
843 ifwht(cf
->coeffs
, cf
->de_fwht
,
844 (stat
& PFRAME_BIT
) ? 0 : 1);
846 copies
= (stat
& DUPS_MASK
) >> 1;
848 memcpy(copy
, cf
->de_fwht
, sizeof(copy
));
849 if (stat
& PFRAME_BIT
)
850 add_deltas(cf
->de_fwht
, refp
, width
);
851 fill_decoder_block(refp
, cf
->de_fwht
, width
);
856 void fwht_decode_frame(struct fwht_cframe
*cf
, struct fwht_raw_frame
*ref
,
857 u32 hdr_flags
, unsigned int components_num
)
859 const __be16
*rlco
= cf
->rlc_data
;
861 decode_plane(cf
, &rlco
, ref
->luma
, cf
->height
, cf
->width
,
862 hdr_flags
& FWHT_FL_LUMA_IS_UNCOMPRESSED
);
864 if (components_num
>= 3) {
868 if (!(hdr_flags
& FWHT_FL_CHROMA_FULL_HEIGHT
))
870 if (!(hdr_flags
& FWHT_FL_CHROMA_FULL_WIDTH
))
872 decode_plane(cf
, &rlco
, ref
->cb
, h
, w
,
873 hdr_flags
& FWHT_FL_CB_IS_UNCOMPRESSED
);
874 decode_plane(cf
, &rlco
, ref
->cr
, h
, w
,
875 hdr_flags
& FWHT_FL_CR_IS_UNCOMPRESSED
);
878 if (components_num
== 4)
879 decode_plane(cf
, &rlco
, ref
->alpha
, cf
->height
, cf
->width
,
880 hdr_flags
& FWHT_FL_ALPHA_IS_UNCOMPRESSED
);