]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blob - drivers/media/platform/vicodec/codec-fwht.c
HID: logitech-dj: fix spelling in printk
[mirror_ubuntu-kernels.git] / drivers / media / platform / vicodec / codec-fwht.c
1 // SPDX-License-Identifier: LGPL-2.1+
2 /*
3 * Copyright 2016 Tom aan de Wiel
4 * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 *
6 * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
7 *
8 * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
9 * R.D. Brown, 1977
10 */
11
12 #include <linux/string.h>
13 #include "codec-fwht.h"
14
15 /*
16 * Note: bit 0 of the header must always be 0. Otherwise it cannot
17 * be guaranteed that the magic 8 byte sequence (see below) can
18 * never occur in the rlc output.
19 */
20 #define PFRAME_BIT BIT(15)
21 #define DUPS_MASK 0x1ffe
22
23 #define PBLOCK 0
24 #define IBLOCK 1
25
26 #define ALL_ZEROS 15
27
28 static const uint8_t zigzag[64] = {
29 0,
30 1, 8,
31 2, 9, 16,
32 3, 10, 17, 24,
33 4, 11, 18, 25, 32,
34 5, 12, 19, 26, 33, 40,
35 6, 13, 20, 27, 34, 41, 48,
36 7, 14, 21, 28, 35, 42, 49, 56,
37 15, 22, 29, 36, 43, 50, 57,
38 23, 30, 37, 44, 51, 58,
39 31, 38, 45, 52, 59,
40 39, 46, 53, 60,
41 47, 54, 61,
42 55, 62,
43 63,
44 };
45
46
47 static int rlc(const s16 *in, __be16 *output, int blocktype)
48 {
49 s16 block[8 * 8];
50 s16 *wp = block;
51 int i = 0;
52 int x, y;
53 int ret = 0;
54
55 /* read in block from framebuffer */
56 int lastzero_run = 0;
57 int to_encode;
58
59 for (y = 0; y < 8; y++) {
60 for (x = 0; x < 8; x++) {
61 *wp = in[x + y * 8];
62 wp++;
63 }
64 }
65
66 /* keep track of amount of trailing zeros */
67 for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
68 lastzero_run++;
69
70 *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
71 ret++;
72
73 to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
74
75 i = 0;
76 while (i < to_encode) {
77 int cnt = 0;
78 int tmp;
79
80 /* count leading zeros */
81 while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
82 cnt++;
83 i++;
84 if (i == to_encode) {
85 cnt--;
86 break;
87 }
88 }
89 /* 4 bits for run, 12 for coefficient (quantization by 4) */
90 *output++ = htons((cnt | tmp << 4));
91 i++;
92 ret++;
93 }
94 if (lastzero_run > 14) {
95 *output = htons(ALL_ZEROS | 0);
96 ret++;
97 }
98
99 return ret;
100 }
101
102 /*
103 * This function will worst-case increase rlc_in by 65*2 bytes:
104 * one s16 value for the header and 8 * 8 coefficients of type s16.
105 */
106 static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
107 {
108 /* header */
109 const __be16 *input = *rlc_in;
110 s16 ret = ntohs(*input++);
111 int dec_count = 0;
112 s16 block[8 * 8 + 16];
113 s16 *wp = block;
114 int i;
115
116 /*
117 * Now de-compress, it expands one byte to up to 15 bytes
118 * (or fills the remainder of the 64 bytes with zeroes if it
119 * is the last byte to expand).
120 *
121 * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
122 * allow for overflow if the incoming data was malformed.
123 */
124 while (dec_count < 8 * 8) {
125 s16 in = ntohs(*input++);
126 int length = in & 0xf;
127 int coeff = in >> 4;
128
129 /* fill remainder with zeros */
130 if (length == 15) {
131 for (i = 0; i < 64 - dec_count; i++)
132 *wp++ = 0;
133 break;
134 }
135
136 for (i = 0; i < length; i++)
137 *wp++ = 0;
138 *wp++ = coeff;
139 dec_count += length + 1;
140 }
141
142 wp = block;
143
144 for (i = 0; i < 64; i++) {
145 int pos = zigzag[i];
146 int y = pos / 8;
147 int x = pos % 8;
148
149 dwht_out[x + y * 8] = *wp++;
150 }
151 *rlc_in = input;
152 return ret;
153 }
154
155 static const int quant_table[] = {
156 2, 2, 2, 2, 2, 2, 2, 2,
157 2, 2, 2, 2, 2, 2, 2, 2,
158 2, 2, 2, 2, 2, 2, 2, 3,
159 2, 2, 2, 2, 2, 2, 3, 6,
160 2, 2, 2, 2, 2, 3, 6, 6,
161 2, 2, 2, 2, 3, 6, 6, 6,
162 2, 2, 2, 3, 6, 6, 6, 6,
163 2, 2, 3, 6, 6, 6, 6, 8,
164 };
165
166 static const int quant_table_p[] = {
167 3, 3, 3, 3, 3, 3, 3, 3,
168 3, 3, 3, 3, 3, 3, 3, 3,
169 3, 3, 3, 3, 3, 3, 3, 3,
170 3, 3, 3, 3, 3, 3, 3, 6,
171 3, 3, 3, 3, 3, 3, 6, 6,
172 3, 3, 3, 3, 3, 6, 6, 9,
173 3, 3, 3, 3, 6, 6, 9, 9,
174 3, 3, 3, 6, 6, 9, 9, 10,
175 };
176
177 static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
178 {
179 const int *quant = quant_table;
180 int i, j;
181
182 for (j = 0; j < 8; j++) {
183 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
184 *coeff >>= *quant;
185 if (*coeff >= -qp && *coeff <= qp)
186 *coeff = *de_coeff = 0;
187 else
188 *de_coeff = *coeff << *quant;
189 }
190 }
191 }
192
193 static void dequantize_intra(s16 *coeff)
194 {
195 const int *quant = quant_table;
196 int i, j;
197
198 for (j = 0; j < 8; j++)
199 for (i = 0; i < 8; i++, quant++, coeff++)
200 *coeff <<= *quant;
201 }
202
203 static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
204 {
205 const int *quant = quant_table_p;
206 int i, j;
207
208 for (j = 0; j < 8; j++) {
209 for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
210 *coeff >>= *quant;
211 if (*coeff >= -qp && *coeff <= qp)
212 *coeff = *de_coeff = 0;
213 else
214 *de_coeff = *coeff << *quant;
215 }
216 }
217 }
218
219 static void dequantize_inter(s16 *coeff)
220 {
221 const int *quant = quant_table_p;
222 int i, j;
223
224 for (j = 0; j < 8; j++)
225 for (i = 0; i < 8; i++, quant++, coeff++)
226 *coeff <<= *quant;
227 }
228
229 static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
230 unsigned int input_step, bool intra)
231 {
232 /* we'll need more than 8 bits for the transformed coefficients */
233 s32 workspace1[8], workspace2[8];
234 const u8 *tmp = block;
235 s16 *out = output_block;
236 int add = intra ? 256 : 0;
237 unsigned int i;
238
239 /* stage 1 */
240 stride *= input_step;
241
242 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
243 switch (input_step) {
244 case 1:
245 workspace1[0] = tmp[0] + tmp[1] - add;
246 workspace1[1] = tmp[0] - tmp[1];
247
248 workspace1[2] = tmp[2] + tmp[3] - add;
249 workspace1[3] = tmp[2] - tmp[3];
250
251 workspace1[4] = tmp[4] + tmp[5] - add;
252 workspace1[5] = tmp[4] - tmp[5];
253
254 workspace1[6] = tmp[6] + tmp[7] - add;
255 workspace1[7] = tmp[6] - tmp[7];
256 break;
257 case 2:
258 workspace1[0] = tmp[0] + tmp[2] - add;
259 workspace1[1] = tmp[0] - tmp[2];
260
261 workspace1[2] = tmp[4] + tmp[6] - add;
262 workspace1[3] = tmp[4] - tmp[6];
263
264 workspace1[4] = tmp[8] + tmp[10] - add;
265 workspace1[5] = tmp[8] - tmp[10];
266
267 workspace1[6] = tmp[12] + tmp[14] - add;
268 workspace1[7] = tmp[12] - tmp[14];
269 break;
270 case 3:
271 workspace1[0] = tmp[0] + tmp[3] - add;
272 workspace1[1] = tmp[0] - tmp[3];
273
274 workspace1[2] = tmp[6] + tmp[9] - add;
275 workspace1[3] = tmp[6] - tmp[9];
276
277 workspace1[4] = tmp[12] + tmp[15] - add;
278 workspace1[5] = tmp[12] - tmp[15];
279
280 workspace1[6] = tmp[18] + tmp[21] - add;
281 workspace1[7] = tmp[18] - tmp[21];
282 break;
283 default:
284 workspace1[0] = tmp[0] + tmp[4] - add;
285 workspace1[1] = tmp[0] - tmp[4];
286
287 workspace1[2] = tmp[8] + tmp[12] - add;
288 workspace1[3] = tmp[8] - tmp[12];
289
290 workspace1[4] = tmp[16] + tmp[20] - add;
291 workspace1[5] = tmp[16] - tmp[20];
292
293 workspace1[6] = tmp[24] + tmp[28] - add;
294 workspace1[7] = tmp[24] - tmp[28];
295 break;
296 }
297
298 /* stage 2 */
299 workspace2[0] = workspace1[0] + workspace1[2];
300 workspace2[1] = workspace1[0] - workspace1[2];
301 workspace2[2] = workspace1[1] - workspace1[3];
302 workspace2[3] = workspace1[1] + workspace1[3];
303
304 workspace2[4] = workspace1[4] + workspace1[6];
305 workspace2[5] = workspace1[4] - workspace1[6];
306 workspace2[6] = workspace1[5] - workspace1[7];
307 workspace2[7] = workspace1[5] + workspace1[7];
308
309 /* stage 3 */
310 out[0] = workspace2[0] + workspace2[4];
311 out[1] = workspace2[0] - workspace2[4];
312 out[2] = workspace2[1] - workspace2[5];
313 out[3] = workspace2[1] + workspace2[5];
314 out[4] = workspace2[2] + workspace2[6];
315 out[5] = workspace2[2] - workspace2[6];
316 out[6] = workspace2[3] - workspace2[7];
317 out[7] = workspace2[3] + workspace2[7];
318 }
319
320 out = output_block;
321
322 for (i = 0; i < 8; i++, out++) {
323 /* stage 1 */
324 workspace1[0] = out[0] + out[1 * 8];
325 workspace1[1] = out[0] - out[1 * 8];
326
327 workspace1[2] = out[2 * 8] + out[3 * 8];
328 workspace1[3] = out[2 * 8] - out[3 * 8];
329
330 workspace1[4] = out[4 * 8] + out[5 * 8];
331 workspace1[5] = out[4 * 8] - out[5 * 8];
332
333 workspace1[6] = out[6 * 8] + out[7 * 8];
334 workspace1[7] = out[6 * 8] - out[7 * 8];
335
336 /* stage 2 */
337 workspace2[0] = workspace1[0] + workspace1[2];
338 workspace2[1] = workspace1[0] - workspace1[2];
339 workspace2[2] = workspace1[1] - workspace1[3];
340 workspace2[3] = workspace1[1] + workspace1[3];
341
342 workspace2[4] = workspace1[4] + workspace1[6];
343 workspace2[5] = workspace1[4] - workspace1[6];
344 workspace2[6] = workspace1[5] - workspace1[7];
345 workspace2[7] = workspace1[5] + workspace1[7];
346 /* stage 3 */
347 out[0 * 8] = workspace2[0] + workspace2[4];
348 out[1 * 8] = workspace2[0] - workspace2[4];
349 out[2 * 8] = workspace2[1] - workspace2[5];
350 out[3 * 8] = workspace2[1] + workspace2[5];
351 out[4 * 8] = workspace2[2] + workspace2[6];
352 out[5 * 8] = workspace2[2] - workspace2[6];
353 out[6 * 8] = workspace2[3] - workspace2[7];
354 out[7 * 8] = workspace2[3] + workspace2[7];
355 }
356 }
357
358 /*
359 * Not the nicest way of doing it, but P-blocks get twice the range of
360 * that of the I-blocks. Therefore we need a type bigger than 8 bits.
361 * Furthermore values can be negative... This is just a version that
362 * works with 16 signed data
363 */
364 static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
365 {
366 /* we'll need more than 8 bits for the transformed coefficients */
367 s32 workspace1[8], workspace2[8];
368 const s16 *tmp = block;
369 s16 *out = output_block;
370 int i;
371
372 for (i = 0; i < 8; i++, tmp += stride, out += 8) {
373 /* stage 1 */
374 workspace1[0] = tmp[0] + tmp[1];
375 workspace1[1] = tmp[0] - tmp[1];
376
377 workspace1[2] = tmp[2] + tmp[3];
378 workspace1[3] = tmp[2] - tmp[3];
379
380 workspace1[4] = tmp[4] + tmp[5];
381 workspace1[5] = tmp[4] - tmp[5];
382
383 workspace1[6] = tmp[6] + tmp[7];
384 workspace1[7] = tmp[6] - tmp[7];
385
386 /* stage 2 */
387 workspace2[0] = workspace1[0] + workspace1[2];
388 workspace2[1] = workspace1[0] - workspace1[2];
389 workspace2[2] = workspace1[1] - workspace1[3];
390 workspace2[3] = workspace1[1] + workspace1[3];
391
392 workspace2[4] = workspace1[4] + workspace1[6];
393 workspace2[5] = workspace1[4] - workspace1[6];
394 workspace2[6] = workspace1[5] - workspace1[7];
395 workspace2[7] = workspace1[5] + workspace1[7];
396
397 /* stage 3 */
398 out[0] = workspace2[0] + workspace2[4];
399 out[1] = workspace2[0] - workspace2[4];
400 out[2] = workspace2[1] - workspace2[5];
401 out[3] = workspace2[1] + workspace2[5];
402 out[4] = workspace2[2] + workspace2[6];
403 out[5] = workspace2[2] - workspace2[6];
404 out[6] = workspace2[3] - workspace2[7];
405 out[7] = workspace2[3] + workspace2[7];
406 }
407
408 out = output_block;
409
410 for (i = 0; i < 8; i++, out++) {
411 /* stage 1 */
412 workspace1[0] = out[0] + out[1*8];
413 workspace1[1] = out[0] - out[1*8];
414
415 workspace1[2] = out[2*8] + out[3*8];
416 workspace1[3] = out[2*8] - out[3*8];
417
418 workspace1[4] = out[4*8] + out[5*8];
419 workspace1[5] = out[4*8] - out[5*8];
420
421 workspace1[6] = out[6*8] + out[7*8];
422 workspace1[7] = out[6*8] - out[7*8];
423
424 /* stage 2 */
425 workspace2[0] = workspace1[0] + workspace1[2];
426 workspace2[1] = workspace1[0] - workspace1[2];
427 workspace2[2] = workspace1[1] - workspace1[3];
428 workspace2[3] = workspace1[1] + workspace1[3];
429
430 workspace2[4] = workspace1[4] + workspace1[6];
431 workspace2[5] = workspace1[4] - workspace1[6];
432 workspace2[6] = workspace1[5] - workspace1[7];
433 workspace2[7] = workspace1[5] + workspace1[7];
434
435 /* stage 3 */
436 out[0*8] = workspace2[0] + workspace2[4];
437 out[1*8] = workspace2[0] - workspace2[4];
438 out[2*8] = workspace2[1] - workspace2[5];
439 out[3*8] = workspace2[1] + workspace2[5];
440 out[4*8] = workspace2[2] + workspace2[6];
441 out[5*8] = workspace2[2] - workspace2[6];
442 out[6*8] = workspace2[3] - workspace2[7];
443 out[7*8] = workspace2[3] + workspace2[7];
444 }
445 }
446
447 static void ifwht(const s16 *block, s16 *output_block, int intra)
448 {
449 /*
450 * we'll need more than 8 bits for the transformed coefficients
451 * use native unit of cpu
452 */
453 int workspace1[8], workspace2[8];
454 int inter = intra ? 0 : 1;
455 const s16 *tmp = block;
456 s16 *out = output_block;
457 int i;
458
459 for (i = 0; i < 8; i++, tmp += 8, out += 8) {
460 /* stage 1 */
461 workspace1[0] = tmp[0] + tmp[1];
462 workspace1[1] = tmp[0] - tmp[1];
463
464 workspace1[2] = tmp[2] + tmp[3];
465 workspace1[3] = tmp[2] - tmp[3];
466
467 workspace1[4] = tmp[4] + tmp[5];
468 workspace1[5] = tmp[4] - tmp[5];
469
470 workspace1[6] = tmp[6] + tmp[7];
471 workspace1[7] = tmp[6] - tmp[7];
472
473 /* stage 2 */
474 workspace2[0] = workspace1[0] + workspace1[2];
475 workspace2[1] = workspace1[0] - workspace1[2];
476 workspace2[2] = workspace1[1] - workspace1[3];
477 workspace2[3] = workspace1[1] + workspace1[3];
478
479 workspace2[4] = workspace1[4] + workspace1[6];
480 workspace2[5] = workspace1[4] - workspace1[6];
481 workspace2[6] = workspace1[5] - workspace1[7];
482 workspace2[7] = workspace1[5] + workspace1[7];
483
484 /* stage 3 */
485 out[0] = workspace2[0] + workspace2[4];
486 out[1] = workspace2[0] - workspace2[4];
487 out[2] = workspace2[1] - workspace2[5];
488 out[3] = workspace2[1] + workspace2[5];
489 out[4] = workspace2[2] + workspace2[6];
490 out[5] = workspace2[2] - workspace2[6];
491 out[6] = workspace2[3] - workspace2[7];
492 out[7] = workspace2[3] + workspace2[7];
493 }
494
495 out = output_block;
496
497 for (i = 0; i < 8; i++, out++) {
498 /* stage 1 */
499 workspace1[0] = out[0] + out[1 * 8];
500 workspace1[1] = out[0] - out[1 * 8];
501
502 workspace1[2] = out[2 * 8] + out[3 * 8];
503 workspace1[3] = out[2 * 8] - out[3 * 8];
504
505 workspace1[4] = out[4 * 8] + out[5 * 8];
506 workspace1[5] = out[4 * 8] - out[5 * 8];
507
508 workspace1[6] = out[6 * 8] + out[7 * 8];
509 workspace1[7] = out[6 * 8] - out[7 * 8];
510
511 /* stage 2 */
512 workspace2[0] = workspace1[0] + workspace1[2];
513 workspace2[1] = workspace1[0] - workspace1[2];
514 workspace2[2] = workspace1[1] - workspace1[3];
515 workspace2[3] = workspace1[1] + workspace1[3];
516
517 workspace2[4] = workspace1[4] + workspace1[6];
518 workspace2[5] = workspace1[4] - workspace1[6];
519 workspace2[6] = workspace1[5] - workspace1[7];
520 workspace2[7] = workspace1[5] + workspace1[7];
521
522 /* stage 3 */
523 if (inter) {
524 int d;
525
526 out[0 * 8] = workspace2[0] + workspace2[4];
527 out[1 * 8] = workspace2[0] - workspace2[4];
528 out[2 * 8] = workspace2[1] - workspace2[5];
529 out[3 * 8] = workspace2[1] + workspace2[5];
530 out[4 * 8] = workspace2[2] + workspace2[6];
531 out[5 * 8] = workspace2[2] - workspace2[6];
532 out[6 * 8] = workspace2[3] - workspace2[7];
533 out[7 * 8] = workspace2[3] + workspace2[7];
534
535 for (d = 0; d < 8; d++)
536 out[8 * d] >>= 6;
537 } else {
538 int d;
539
540 out[0 * 8] = workspace2[0] + workspace2[4];
541 out[1 * 8] = workspace2[0] - workspace2[4];
542 out[2 * 8] = workspace2[1] - workspace2[5];
543 out[3 * 8] = workspace2[1] + workspace2[5];
544 out[4 * 8] = workspace2[2] + workspace2[6];
545 out[5 * 8] = workspace2[2] - workspace2[6];
546 out[6 * 8] = workspace2[3] - workspace2[7];
547 out[7 * 8] = workspace2[3] + workspace2[7];
548
549 for (d = 0; d < 8; d++) {
550 out[8 * d] >>= 6;
551 out[8 * d] += 128;
552 }
553 }
554 }
555 }
556
557 static void fill_encoder_block(const u8 *input, s16 *dst,
558 unsigned int stride, unsigned int input_step)
559 {
560 int i, j;
561
562 for (i = 0; i < 8; i++) {
563 for (j = 0; j < 8; j++, input += input_step)
564 *dst++ = *input;
565 input += (stride - 8) * input_step;
566 }
567 }
568
569 static int var_intra(const s16 *input)
570 {
571 int32_t mean = 0;
572 int32_t ret = 0;
573 const s16 *tmp = input;
574 int i;
575
576 for (i = 0; i < 8 * 8; i++, tmp++)
577 mean += *tmp;
578 mean /= 64;
579 tmp = input;
580 for (i = 0; i < 8 * 8; i++, tmp++)
581 ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
582 return ret;
583 }
584
585 static int var_inter(const s16 *old, const s16 *new)
586 {
587 int32_t ret = 0;
588 int i;
589
590 for (i = 0; i < 8 * 8; i++, old++, new++)
591 ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
592 return ret;
593 }
594
595 static int decide_blocktype(const u8 *cur, const u8 *reference,
596 s16 *deltablock, unsigned int stride,
597 unsigned int input_step)
598 {
599 s16 tmp[64];
600 s16 old[64];
601 s16 *work = tmp;
602 unsigned int k, l;
603 int vari;
604 int vard;
605
606 fill_encoder_block(cur, tmp, stride, input_step);
607 fill_encoder_block(reference, old, 8, 1);
608 vari = var_intra(tmp);
609
610 for (k = 0; k < 8; k++) {
611 for (l = 0; l < 8; l++) {
612 *deltablock = *work - *reference;
613 deltablock++;
614 work++;
615 reference++;
616 }
617 }
618 deltablock -= 64;
619 vard = var_inter(old, tmp);
620 return vari <= vard ? IBLOCK : PBLOCK;
621 }
622
623 static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
624 {
625 int i, j;
626
627 for (i = 0; i < 8; i++) {
628 for (j = 0; j < 8; j++, input++, dst++) {
629 if (*input < 0)
630 *dst = 0;
631 else if (*input > 255)
632 *dst = 255;
633 else
634 *dst = *input;
635 }
636 dst += stride - 8;
637 }
638 }
639
640 static void add_deltas(s16 *deltas, const u8 *ref, int stride)
641 {
642 int k, l;
643
644 for (k = 0; k < 8; k++) {
645 for (l = 0; l < 8; l++) {
646 *deltas += *ref++;
647 /*
648 * Due to quantizing, it might possible that the
649 * decoded coefficients are slightly out of range
650 */
651 if (*deltas < 0)
652 *deltas = 0;
653 else if (*deltas > 255)
654 *deltas = 255;
655 deltas++;
656 }
657 ref += stride - 8;
658 }
659 }
660
661 static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
662 struct fwht_cframe *cf, u32 height, u32 width,
663 unsigned int input_step,
664 bool is_intra, bool next_is_intra)
665 {
666 u8 *input_start = input;
667 __be16 *rlco_start = *rlco;
668 s16 deltablock[64];
669 __be16 pframe_bit = htons(PFRAME_BIT);
670 u32 encoding = 0;
671 unsigned int last_size = 0;
672 unsigned int i, j;
673
674 for (j = 0; j < height / 8; j++) {
675 for (i = 0; i < width / 8; i++) {
676 /* intra code, first frame is always intra coded. */
677 int blocktype = IBLOCK;
678 unsigned int size;
679
680 if (!is_intra)
681 blocktype = decide_blocktype(input, refp,
682 deltablock, width, input_step);
683 if (blocktype == IBLOCK) {
684 fwht(input, cf->coeffs, width, input_step, 1);
685 quantize_intra(cf->coeffs, cf->de_coeffs,
686 cf->i_frame_qp);
687 } else {
688 /* inter code */
689 encoding |= FWHT_FRAME_PCODED;
690 fwht16(deltablock, cf->coeffs, 8, 0);
691 quantize_inter(cf->coeffs, cf->de_coeffs,
692 cf->p_frame_qp);
693 }
694 if (!next_is_intra) {
695 ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
696
697 if (blocktype == PBLOCK)
698 add_deltas(cf->de_fwht, refp, 8);
699 fill_decoder_block(refp, cf->de_fwht, 8);
700 }
701
702 input += 8 * input_step;
703 refp += 8 * 8;
704
705 size = rlc(cf->coeffs, *rlco, blocktype);
706 if (last_size == size &&
707 !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
708 __be16 *last_rlco = *rlco - size;
709 s16 hdr = ntohs(*last_rlco);
710
711 if (!((*last_rlco ^ **rlco) & pframe_bit) &&
712 (hdr & DUPS_MASK) < DUPS_MASK)
713 *last_rlco = htons(hdr + 2);
714 else
715 *rlco += size;
716 } else {
717 *rlco += size;
718 }
719 if (*rlco >= rlco_max) {
720 encoding |= FWHT_FRAME_UNENCODED;
721 goto exit_loop;
722 }
723 last_size = size;
724 }
725 input += width * 7 * input_step;
726 }
727
728 exit_loop:
729 if (encoding & FWHT_FRAME_UNENCODED) {
730 u8 *out = (u8 *)rlco_start;
731
732 input = input_start;
733 /*
734 * The compressed stream should never contain the magic
735 * header, so when we copy the YUV data we replace 0xff
736 * by 0xfe. Since YUV is limited range such values
737 * shouldn't appear anyway.
738 */
739 for (i = 0; i < height * width; i++, input += input_step)
740 *out++ = (*input == 0xff) ? 0xfe : *input;
741 *rlco = (__be16 *)out;
742 encoding &= ~FWHT_FRAME_PCODED;
743 }
744 return encoding;
745 }
746
747 u32 fwht_encode_frame(struct fwht_raw_frame *frm,
748 struct fwht_raw_frame *ref_frm,
749 struct fwht_cframe *cf,
750 bool is_intra, bool next_is_intra)
751 {
752 unsigned int size = frm->height * frm->width;
753 __be16 *rlco = cf->rlc_data;
754 __be16 *rlco_max;
755 u32 encoding;
756
757 rlco_max = rlco + size / 2 - 256;
758 encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
759 frm->height, frm->width,
760 frm->luma_alpha_step, is_intra, next_is_intra);
761 if (encoding & FWHT_FRAME_UNENCODED)
762 encoding |= FWHT_LUMA_UNENCODED;
763 encoding &= ~FWHT_FRAME_UNENCODED;
764
765 if (frm->components_num >= 3) {
766 u32 chroma_h = frm->height / frm->height_div;
767 u32 chroma_w = frm->width / frm->width_div;
768 unsigned int chroma_size = chroma_h * chroma_w;
769
770 rlco_max = rlco + chroma_size / 2 - 256;
771 encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max,
772 cf, chroma_h, chroma_w,
773 frm->chroma_step,
774 is_intra, next_is_intra);
775 if (encoding & FWHT_FRAME_UNENCODED)
776 encoding |= FWHT_CB_UNENCODED;
777 encoding &= ~FWHT_FRAME_UNENCODED;
778 rlco_max = rlco + chroma_size / 2 - 256;
779 encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max,
780 cf, chroma_h, chroma_w,
781 frm->chroma_step,
782 is_intra, next_is_intra);
783 if (encoding & FWHT_FRAME_UNENCODED)
784 encoding |= FWHT_CR_UNENCODED;
785 encoding &= ~FWHT_FRAME_UNENCODED;
786 }
787
788 if (frm->components_num == 4) {
789 rlco_max = rlco + size / 2 - 256;
790 encoding = encode_plane(frm->alpha, ref_frm->alpha, &rlco,
791 rlco_max, cf, frm->height, frm->width,
792 frm->luma_alpha_step,
793 is_intra, next_is_intra);
794 if (encoding & FWHT_FRAME_UNENCODED)
795 encoding |= FWHT_ALPHA_UNENCODED;
796 encoding &= ~FWHT_FRAME_UNENCODED;
797 }
798
799 cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
800 return encoding;
801 }
802
803 static void decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
804 u32 height, u32 width, bool uncompressed)
805 {
806 unsigned int copies = 0;
807 s16 copy[8 * 8];
808 s16 stat;
809 unsigned int i, j;
810
811 if (uncompressed) {
812 memcpy(ref, *rlco, width * height);
813 *rlco += width * height / 2;
814 return;
815 }
816
817 /*
818 * When decoding each macroblock the rlco pointer will be increased
819 * by 65 * 2 bytes worst-case.
820 * To avoid overflow the buffer has to be 65/64th of the actual raw
821 * image size, just in case someone feeds it malicious data.
822 */
823 for (j = 0; j < height / 8; j++) {
824 for (i = 0; i < width / 8; i++) {
825 u8 *refp = ref + j * 8 * width + i * 8;
826
827 if (copies) {
828 memcpy(cf->de_fwht, copy, sizeof(copy));
829 if (stat & PFRAME_BIT)
830 add_deltas(cf->de_fwht, refp, width);
831 fill_decoder_block(refp, cf->de_fwht, width);
832 copies--;
833 continue;
834 }
835
836 stat = derlc(rlco, cf->coeffs);
837
838 if (stat & PFRAME_BIT)
839 dequantize_inter(cf->coeffs);
840 else
841 dequantize_intra(cf->coeffs);
842
843 ifwht(cf->coeffs, cf->de_fwht,
844 (stat & PFRAME_BIT) ? 0 : 1);
845
846 copies = (stat & DUPS_MASK) >> 1;
847 if (copies)
848 memcpy(copy, cf->de_fwht, sizeof(copy));
849 if (stat & PFRAME_BIT)
850 add_deltas(cf->de_fwht, refp, width);
851 fill_decoder_block(refp, cf->de_fwht, width);
852 }
853 }
854 }
855
856 void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
857 u32 hdr_flags, unsigned int components_num)
858 {
859 const __be16 *rlco = cf->rlc_data;
860
861 decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
862 hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED);
863
864 if (components_num >= 3) {
865 u32 h = cf->height;
866 u32 w = cf->width;
867
868 if (!(hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT))
869 h /= 2;
870 if (!(hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH))
871 w /= 2;
872 decode_plane(cf, &rlco, ref->cb, h, w,
873 hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED);
874 decode_plane(cf, &rlco, ref->cr, h, w,
875 hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED);
876 }
877
878 if (components_num == 4)
879 decode_plane(cf, &rlco, ref->alpha, cf->height, cf->width,
880 hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED);
881 }