]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/media/platform/sti/hva/hva-h264.c
Merge branches 'for-4.11/upstream-fixes', 'for-4.12/accutouch', 'for-4.12/cp2112...
[mirror_ubuntu-artful-kernel.git] / drivers / media / platform / sti / hva / hva-h264.c
CommitLineData
ba4616b7
JCT
1/*
2 * Copyright (C) STMicroelectronics SA 2015
3 * Authors: Yannick Fertre <yannick.fertre@st.com>
4 * Hugues Fruchet <hugues.fruchet@st.com>
5 * License terms: GNU General Public License (GPL), version 2
6 */
7
8#include "hva.h"
9#include "hva-hw.h"
10
11#define MAX_SPS_PPS_SIZE 128
12
13#define BITSTREAM_OFFSET_MASK 0x7F
14
15/* video max size*/
16#define H264_MAX_SIZE_W 1920
17#define H264_MAX_SIZE_H 1920
18
19/* macroBlocs number (width & height) */
20#define MB_W(w) ((w + 0xF) / 0x10)
21#define MB_H(h) ((h + 0xF) / 0x10)
22
23/* formula to get temporal or spatial data size */
24#define DATA_SIZE(w, h) (MB_W(w) * MB_H(h) * 16)
25
26#define SEARCH_WINDOW_BUFFER_MAX_SIZE(w) ((4 * MB_W(w) + 42) * 256 * 3 / 2)
27#define CABAC_CONTEXT_BUFFER_MAX_SIZE(w) (MB_W(w) * 16)
28#define CTX_MB_BUFFER_MAX_SIZE(w) (MB_W(w) * 16 * 8)
29#define SLICE_HEADER_SIZE (4 * 16)
30#define BRC_DATA_SIZE (5 * 16)
31
32/* source buffer copy in YUV 420 MB-tiled format with size=16*256*3/2 */
33#define CURRENT_WINDOW_BUFFER_MAX_SIZE (16 * 256 * 3 / 2)
34
35/*
36 * 4 lines of pixels (in Luma, Chroma blue and Chroma red) of top MB
37 * for deblocking with size=4*16*MBx*2
38 */
39#define LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(w) (4 * 16 * MB_W(w) * 2)
40
41/* factor for bitrate and cpb buffer size max values if profile >= high */
42#define H264_FACTOR_HIGH 1200
43
44/* factor for bitrate and cpb buffer size max values if profile < high */
45#define H264_FACTOR_BASELINE 1000
46
47/* number of bytes for NALU_TYPE_FILLER_DATA header and footer */
48#define H264_FILLER_DATA_SIZE 6
49
50struct h264_profile {
51 enum v4l2_mpeg_video_h264_level level;
52 u32 max_mb_per_seconds;
53 u32 max_frame_size;
54 u32 max_bitrate;
55 u32 max_cpb_size;
56 u32 min_comp_ratio;
57};
58
59static const struct h264_profile h264_infos_list[] = {
60 {V4L2_MPEG_VIDEO_H264_LEVEL_1_0, 1485, 99, 64, 175, 2},
61 {V4L2_MPEG_VIDEO_H264_LEVEL_1B, 1485, 99, 128, 350, 2},
62 {V4L2_MPEG_VIDEO_H264_LEVEL_1_1, 3000, 396, 192, 500, 2},
63 {V4L2_MPEG_VIDEO_H264_LEVEL_1_2, 6000, 396, 384, 1000, 2},
64 {V4L2_MPEG_VIDEO_H264_LEVEL_1_3, 11880, 396, 768, 2000, 2},
65 {V4L2_MPEG_VIDEO_H264_LEVEL_2_0, 11880, 396, 2000, 2000, 2},
66 {V4L2_MPEG_VIDEO_H264_LEVEL_2_1, 19800, 792, 4000, 4000, 2},
67 {V4L2_MPEG_VIDEO_H264_LEVEL_2_2, 20250, 1620, 4000, 4000, 2},
68 {V4L2_MPEG_VIDEO_H264_LEVEL_3_0, 40500, 1620, 10000, 10000, 2},
69 {V4L2_MPEG_VIDEO_H264_LEVEL_3_1, 108000, 3600, 14000, 14000, 4},
70 {V4L2_MPEG_VIDEO_H264_LEVEL_3_2, 216000, 5120, 20000, 20000, 4},
71 {V4L2_MPEG_VIDEO_H264_LEVEL_4_0, 245760, 8192, 20000, 25000, 4},
72 {V4L2_MPEG_VIDEO_H264_LEVEL_4_1, 245760, 8192, 50000, 62500, 2},
73 {V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 522240, 8704, 50000, 62500, 2},
74 {V4L2_MPEG_VIDEO_H264_LEVEL_5_0, 589824, 22080, 135000, 135000, 2},
75 {V4L2_MPEG_VIDEO_H264_LEVEL_5_1, 983040, 36864, 240000, 240000, 2}
76};
77
78enum hva_brc_type {
79 BRC_TYPE_NONE = 0,
80 BRC_TYPE_CBR = 1,
81 BRC_TYPE_VBR = 2,
82 BRC_TYPE_VBR_LOW_DELAY = 3
83};
84
85enum hva_entropy_coding_mode {
86 CAVLC = 0,
87 CABAC = 1
88};
89
90enum hva_picture_coding_type {
91 PICTURE_CODING_TYPE_I = 0,
92 PICTURE_CODING_TYPE_P = 1,
93 PICTURE_CODING_TYPE_B = 2
94};
95
96enum hva_h264_sampling_mode {
97 SAMPLING_MODE_NV12 = 0,
98 SAMPLING_MODE_UYVY = 1,
99 SAMPLING_MODE_RGB3 = 3,
100 SAMPLING_MODE_XRGB4 = 4,
101 SAMPLING_MODE_NV21 = 8,
102 SAMPLING_MODE_VYUY = 9,
103 SAMPLING_MODE_BGR3 = 11,
104 SAMPLING_MODE_XBGR4 = 12,
105 SAMPLING_MODE_RGBX4 = 20,
106 SAMPLING_MODE_BGRX4 = 28
107};
108
109enum hva_h264_nalu_type {
110 NALU_TYPE_UNKNOWN = 0,
111 NALU_TYPE_SLICE = 1,
112 NALU_TYPE_SLICE_DPA = 2,
113 NALU_TYPE_SLICE_DPB = 3,
114 NALU_TYPE_SLICE_DPC = 4,
115 NALU_TYPE_SLICE_IDR = 5,
116 NALU_TYPE_SEI = 6,
117 NALU_TYPE_SPS = 7,
118 NALU_TYPE_PPS = 8,
119 NALU_TYPE_AU_DELIMITER = 9,
120 NALU_TYPE_SEQ_END = 10,
121 NALU_TYPE_STREAM_END = 11,
122 NALU_TYPE_FILLER_DATA = 12,
123 NALU_TYPE_SPS_EXT = 13,
124 NALU_TYPE_PREFIX_UNIT = 14,
125 NALU_TYPE_SUBSET_SPS = 15,
126 NALU_TYPE_SLICE_AUX = 19,
127 NALU_TYPE_SLICE_EXT = 20
128};
129
130enum hva_h264_sei_payload_type {
131 SEI_BUFFERING_PERIOD = 0,
132 SEI_PICTURE_TIMING = 1,
133 SEI_STEREO_VIDEO_INFO = 21,
134 SEI_FRAME_PACKING_ARRANGEMENT = 45
135};
136
137/**
138 * stereo Video Info struct
139 */
140struct hva_h264_stereo_video_sei {
141 u8 field_views_flag;
142 u8 top_field_is_left_view_flag;
143 u8 current_frame_is_left_view_flag;
144 u8 next_frame_is_second_view_flag;
145 u8 left_view_self_contained_flag;
146 u8 right_view_self_contained_flag;
147};
148
149/**
150 * @frame_width: width in pixels of the buffer containing the input frame
151 * @frame_height: height in pixels of the buffer containing the input frame
152 * @frame_num: the parameter to be written in the slice header
153 * @picture_coding_type: type I, P or B
154 * @pic_order_cnt_type: POC mode, as defined in H264 std : can be 0,1,2
155 * @first_picture_in_sequence: flag telling to encoder that this is the
156 * first picture in a video sequence.
157 * Used for VBR
158 * @slice_size_type: 0 = no constraint to close the slice
159 * 1= a slice is closed as soon as the slice_mb_size limit
160 * is reached
161 * 2= a slice is closed as soon as the slice_byte_size limit
162 * is reached
163 * 3= a slice is closed as soon as either the slice_byte_size
164 * limit or the slice_mb_size limit is reached
165 * @slice_mb_size: defines the slice size in number of macroblocks
166 * (used when slice_size_type=1 or slice_size_type=3)
167 * @ir_param_option: defines the number of macroblocks per frame to be
168 * refreshed by AIR algorithm OR the refresh period
169 * by CIR algorithm
170 * @intra_refresh_type: enables the adaptive intra refresh algorithm.
171 * Disable=0 / Adaptative=1 and Cycle=2 as intra refresh
172 * @use_constrained_intra_flag: constrained_intra_pred_flag from PPS
173 * @transform_mode: controls the use of 4x4/8x8 transform mode
174 * @disable_deblocking_filter_idc:
175 * 0: specifies that all luma and chroma block edges of
176 * the slice are filtered.
177 * 1: specifies that deblocking is disabled for all block
178 * edges of the slice.
179 * 2: specifies that all luma and chroma block edges of
180 * the slice are filtered with exception of the block edges
181 * that coincide with slice boundaries
182 * @slice_alpha_c0_offset_div2: to be written in slice header,
183 * controls deblocking
184 * @slice_beta_offset_div2: to be written in slice header,
185 * controls deblocking
186 * @encoder_complexity: encoder complexity control (IME).
187 * 0 = I_16x16, P_16x16, Full ME Complexity
188 * 1 = I_16x16, I_NxN, P_16x16, Full ME Complexity
189 * 2 = I_16x16, I_NXN, P_16x16, P_WxH, Full ME Complexity
190 * 4 = I_16x16, P_16x16, Reduced ME Complexity
191 * 5 = I_16x16, I_NxN, P_16x16, Reduced ME Complexity
192 * 6 = I_16x16, I_NXN, P_16x16, P_WxH, Reduced ME Complexity
193 * @chroma_qp_index_offset: coming from picture parameter set
194 * (PPS see [H.264 STD] 7.4.2.2)
195 * @entropy_coding_mode: entropy coding mode.
196 * 0 = CAVLC
197 * 1 = CABAC
198 * @brc_type: selects the bit-rate control algorithm
199 * 0 = constant Qp, (no BRC)
200 * 1 = CBR
201 * 2 = VBR
202 * @quant: Quantization param used in case of fix QP encoding (no BRC)
203 * @non_VCL_NALU_Size: size of non-VCL NALUs (SPS, PPS, filler),
204 * used by BRC
205 * @cpb_buffer_size: size of Coded Picture Buffer, used by BRC
206 * @bit_rate: target bitrate, for BRC
207 * @qp_min: min QP threshold
208 * @qp_max: max QP threshold
209 * @framerate_num: target framerate numerator , used by BRC
210 * @framerate_den: target framerate denomurator , used by BRC
211 * @delay: End-to-End Initial Delay
212 * @strict_HRD_compliancy: flag for HDR compliancy (1)
213 * May impact quality encoding
214 * @addr_source_buffer: address of input frame buffer for current frame
215 * @addr_fwd_Ref_Buffer: address of reference frame buffer
216 * @addr_rec_buffer: address of reconstructed frame buffer
217 * @addr_output_bitstream_start: output bitstream start address
218 * @addr_output_bitstream_end: output bitstream end address
219 * @addr_external_sw : address of external search window
220 * @addr_lctx : address of context picture buffer
221 * @addr_local_rec_buffer: address of local reconstructed buffer
222 * @addr_spatial_context: address of spatial context buffer
223 * @bitstream_offset: offset in bits between aligned bitstream start
224 * address and first bit to be written by HVA.
225 * Range value is [0..63]
226 * @sampling_mode: Input picture format .
227 * 0: YUV420 semi_planar Interleaved
228 * 1: YUV422 raster Interleaved
229 * @addr_param_out: address of output parameters structure
230 * @addr_scaling_matrix: address to the coefficient of
231 * the inverse scaling matrix
232 * @addr_scaling_matrix_dir: address to the coefficient of
233 * the direct scaling matrix
234 * @addr_cabac_context_buffer: address of cabac context buffer
235 * @GmvX: Input information about the horizontal global displacement of
236 * the encoded frame versus the previous one
237 * @GmvY: Input information about the vertical global displacement of
238 * the encoded frame versus the previous one
239 * @window_width: width in pixels of the window to be encoded inside
240 * the input frame
241 * @window_height: width in pixels of the window to be encoded inside
242 * the input frame
243 * @window_horizontal_offset: horizontal offset in pels for input window
244 * within input frame
245 * @window_vertical_offset: vertical offset in pels for input window
246 * within input frame
247 * @addr_roi: Map of QP offset for the Region of Interest algorithm and
248 * also used for Error map.
249 * Bit 0-6 used for qp offset (value -64 to 63).
250 * Bit 7 used to force intra
251 * @addr_slice_header: address to slice header
252 * @slice_header_size_in_bits: size in bits of the Slice header
253 * @slice_header_offset0: Slice header offset where to insert
254 * first_Mb_in_slice
255 * @slice_header_offset1: Slice header offset where to insert
256 * slice_qp_delta
257 * @slice_header_offset2: Slice header offset where to insert
258 * num_MBs_in_slice
259 * @slice_synchro_enable: enable "slice ready" interrupt after each slice
260 * @max_slice_number: Maximum number of slice in a frame
261 * (0 is strictly forbidden)
262 * @rgb2_yuv_y_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
263 * YUV for the Y component.
264 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
265 * @rgb2_yuv_u_coeff: four coefficients (C0C1C2C3) to convert from RGB to
266 * YUV for the Y component.
267 * Y = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
268 * @rgb2_yuv_v_coeff: Four coefficients (C0C1C2C3) to convert from RGB to
269 * YUV for the U (Cb) component.
270 * U = C0*R + C1*G + C2*B + C3 (C0 is on byte 0)
271 * @slice_byte_size: maximum slice size in bytes
272 * (used when slice_size_type=2 or slice_size_type=3)
273 * @max_air_intra_mb_nb: Maximum number of intra macroblock in a frame
274 * for the AIR algorithm
275 * @brc_no_skip: Disable skipping in the Bitrate Controller
276 * @addr_brc_in_out_parameter: address of static buffer for BRC parameters
277 */
278struct hva_h264_td {
279 u16 frame_width;
280 u16 frame_height;
281 u32 frame_num;
282 u16 picture_coding_type;
283 u16 reserved1;
284 u16 pic_order_cnt_type;
285 u16 first_picture_in_sequence;
286 u16 slice_size_type;
287 u16 reserved2;
288 u32 slice_mb_size;
289 u16 ir_param_option;
290 u16 intra_refresh_type;
291 u16 use_constrained_intra_flag;
292 u16 transform_mode;
293 u16 disable_deblocking_filter_idc;
294 s16 slice_alpha_c0_offset_div2;
295 s16 slice_beta_offset_div2;
296 u16 encoder_complexity;
297 s16 chroma_qp_index_offset;
298 u16 entropy_coding_mode;
299 u16 brc_type;
300 u16 quant;
301 u32 non_vcl_nalu_size;
302 u32 cpb_buffer_size;
303 u32 bit_rate;
304 u16 qp_min;
305 u16 qp_max;
306 u16 framerate_num;
307 u16 framerate_den;
308 u16 delay;
309 u16 strict_hrd_compliancy;
310 u32 addr_source_buffer;
311 u32 addr_fwd_ref_buffer;
312 u32 addr_rec_buffer;
313 u32 addr_output_bitstream_start;
314 u32 addr_output_bitstream_end;
315 u32 addr_external_sw;
316 u32 addr_lctx;
317 u32 addr_local_rec_buffer;
318 u32 addr_spatial_context;
319 u16 bitstream_offset;
320 u16 sampling_mode;
321 u32 addr_param_out;
322 u32 addr_scaling_matrix;
323 u32 addr_scaling_matrix_dir;
324 u32 addr_cabac_context_buffer;
325 u32 reserved3;
326 u32 reserved4;
327 s16 gmv_x;
328 s16 gmv_y;
329 u16 window_width;
330 u16 window_height;
331 u16 window_horizontal_offset;
332 u16 window_vertical_offset;
333 u32 addr_roi;
334 u32 addr_slice_header;
335 u16 slice_header_size_in_bits;
336 u16 slice_header_offset0;
337 u16 slice_header_offset1;
338 u16 slice_header_offset2;
339 u32 reserved5;
340 u32 reserved6;
341 u16 reserved7;
342 u16 reserved8;
343 u16 slice_synchro_enable;
344 u16 max_slice_number;
345 u32 rgb2_yuv_y_coeff;
346 u32 rgb2_yuv_u_coeff;
347 u32 rgb2_yuv_v_coeff;
348 u32 slice_byte_size;
349 u16 max_air_intra_mb_nb;
350 u16 brc_no_skip;
351 u32 addr_temporal_context;
352 u32 addr_brc_in_out_parameter;
353};
354
355/**
356 * @ slice_size: slice size
357 * @ slice_start_time: start time
358 * @ slice_stop_time: stop time
359 * @ slice_num: slice number
360 */
361struct hva_h264_slice_po {
362 u32 slice_size;
363 u32 slice_start_time;
364 u32 slice_end_time;
365 u32 slice_num;
366};
367
368/**
369 * @ bitstream_size: bitstream size
370 * @ dct_bitstream_size: dtc bitstream size
371 * @ stuffing_bits: number of stuffing bits inserted by the encoder
372 * @ removal_time: removal time of current frame (nb of ticks 1/framerate)
373 * @ hvc_start_time: hvc start time
374 * @ hvc_stop_time: hvc stop time
375 * @ slice_count: slice count
376 */
377struct hva_h264_po {
378 u32 bitstream_size;
379 u32 dct_bitstream_size;
380 u32 stuffing_bits;
381 u32 removal_time;
382 u32 hvc_start_time;
383 u32 hvc_stop_time;
384 u32 slice_count;
385 u32 reserved0;
386 struct hva_h264_slice_po slice_params[16];
387};
388
389struct hva_h264_task {
390 struct hva_h264_td td;
391 struct hva_h264_po po;
392};
393
394/**
395 * @seq_info: sequence information buffer
396 * @ref_frame: reference frame buffer
397 * @rec_frame: reconstructed frame buffer
398 * @task: task descriptor
399 */
400struct hva_h264_ctx {
401 struct hva_buffer *seq_info;
402 struct hva_buffer *ref_frame;
403 struct hva_buffer *rec_frame;
404 struct hva_buffer *task;
405};
406
407static int hva_h264_fill_slice_header(struct hva_ctx *pctx,
408 u8 *slice_header_addr,
409 struct hva_controls *ctrls,
410 int frame_num,
411 u16 *header_size,
412 u16 *header_offset0,
413 u16 *header_offset1,
414 u16 *header_offset2)
415{
416 /*
417 * with this HVA hardware version, part of the slice header is computed
418 * on host and part by hardware.
419 * The part of host is precomputed and available through this array.
420 */
421 struct device *dev = ctx_to_dev(pctx);
422 int cabac = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC;
423 const unsigned char slice_header[] = { 0x00, 0x00, 0x00, 0x01,
424 0x41, 0x34, 0x07, 0x00};
425 int idr_pic_id = frame_num % 2;
426 enum hva_picture_coding_type type;
427 u32 frame_order = frame_num % ctrls->gop_size;
428
429 if (!(frame_num % ctrls->gop_size))
430 type = PICTURE_CODING_TYPE_I;
431 else
432 type = PICTURE_CODING_TYPE_P;
433
434 memcpy(slice_header_addr, slice_header, sizeof(slice_header));
435
436 *header_size = 56;
437 *header_offset0 = 40;
438 *header_offset1 = 13;
439 *header_offset2 = 0;
440
441 if (type == PICTURE_CODING_TYPE_I) {
442 slice_header_addr[4] = 0x65;
443 slice_header_addr[5] = 0x11;
444
445 /* toggle the I frame */
446 if ((frame_num / ctrls->gop_size) % 2) {
447 *header_size += 4;
448 *header_offset1 += 4;
449 slice_header_addr[6] = 0x04;
450 slice_header_addr[7] = 0x70;
451
452 } else {
453 *header_size += 2;
454 *header_offset1 += 2;
455 slice_header_addr[6] = 0x09;
456 slice_header_addr[7] = 0xC0;
457 }
458 } else {
459 if (ctrls->entropy_mode == cabac) {
460 *header_size += 1;
461 *header_offset1 += 1;
462 slice_header_addr[7] = 0x80;
463 }
464 /*
465 * update slice header with P frame order
466 * frame order is limited to 16 (coded on 4bits only)
467 */
468 slice_header_addr[5] += ((frame_order & 0x0C) >> 2);
469 slice_header_addr[6] += ((frame_order & 0x03) << 6);
470 }
471
472 dev_dbg(dev,
473 "%s %s slice header order %d idrPicId %d header size %d\n",
474 pctx->name, __func__, frame_order, idr_pic_id, *header_size);
475 return 0;
476}
477
478static int hva_h264_fill_data_nal(struct hva_ctx *pctx,
479 unsigned int stuffing_bytes, u8 *addr,
480 unsigned int stream_size, unsigned int *size)
481{
482 struct device *dev = ctx_to_dev(pctx);
483 const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
484
485 dev_dbg(dev, "%s %s stuffing bytes %d\n", pctx->name, __func__,
486 stuffing_bytes);
487
488 if ((*size + stuffing_bytes + H264_FILLER_DATA_SIZE) > stream_size) {
489 dev_dbg(dev, "%s %s too many stuffing bytes %d\n",
490 pctx->name, __func__, stuffing_bytes);
491 return 0;
492 }
493
494 /* start code */
495 memcpy(addr + *size, start, sizeof(start));
496 *size += sizeof(start);
497
498 /* nal_unit_type */
499 addr[*size] = NALU_TYPE_FILLER_DATA;
500 *size += 1;
501
502 memset(addr + *size, 0xff, stuffing_bytes);
503 *size += stuffing_bytes;
504
505 addr[*size] = 0x80;
506 *size += 1;
507
508 return 0;
509}
510
511static int hva_h264_fill_sei_nal(struct hva_ctx *pctx,
512 enum hva_h264_sei_payload_type type,
513 u8 *addr, u32 *size)
514{
515 struct device *dev = ctx_to_dev(pctx);
516 const u8 start[] = { 0x00, 0x00, 0x00, 0x01 };
517 struct hva_h264_stereo_video_sei info;
518 u8 offset = 7;
519 u8 msg = 0;
520
521 /* start code */
522 memcpy(addr + *size, start, sizeof(start));
523 *size += sizeof(start);
524
525 /* nal_unit_type */
526 addr[*size] = NALU_TYPE_SEI;
527 *size += 1;
528
529 /* payload type */
530 addr[*size] = type;
531 *size += 1;
532
533 switch (type) {
534 case SEI_STEREO_VIDEO_INFO:
535 memset(&info, 0, sizeof(info));
536
537 /* set to top/bottom frame packing arrangement */
538 info.field_views_flag = 1;
539 info.top_field_is_left_view_flag = 1;
540
541 /* payload size */
542 addr[*size] = 1;
543 *size += 1;
544
545 /* payload */
546 msg = info.field_views_flag << offset--;
547
548 if (info.field_views_flag) {
549 msg |= info.top_field_is_left_view_flag <<
550 offset--;
551 } else {
552 msg |= info.current_frame_is_left_view_flag <<
553 offset--;
554 msg |= info.next_frame_is_second_view_flag <<
555 offset--;
556 }
557 msg |= info.left_view_self_contained_flag << offset--;
558 msg |= info.right_view_self_contained_flag << offset--;
559
560 addr[*size] = msg;
561 *size += 1;
562
563 addr[*size] = 0x80;
564 *size += 1;
565
566 return 0;
567 case SEI_BUFFERING_PERIOD:
568 case SEI_PICTURE_TIMING:
569 case SEI_FRAME_PACKING_ARRANGEMENT:
570 default:
571 dev_err(dev, "%s sei nal type not supported %d\n",
572 pctx->name, type);
573 return -EINVAL;
574 }
575}
576
577static int hva_h264_prepare_task(struct hva_ctx *pctx,
578 struct hva_h264_task *task,
579 struct hva_frame *frame,
580 struct hva_stream *stream)
581{
582 struct hva_dev *hva = ctx_to_hdev(pctx);
583 struct device *dev = ctx_to_dev(pctx);
584 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
585 struct hva_buffer *seq_info = ctx->seq_info;
586 struct hva_buffer *fwd_ref_frame = ctx->ref_frame;
587 struct hva_buffer *loc_rec_frame = ctx->rec_frame;
588 struct hva_h264_td *td = &task->td;
589 struct hva_controls *ctrls = &pctx->ctrls;
590 struct v4l2_fract *time_per_frame = &pctx->ctrls.time_per_frame;
591 int cavlc = V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC;
592 u32 frame_num = pctx->stream_num;
593 u32 addr_esram = hva->esram_addr;
594 enum v4l2_mpeg_video_h264_level level;
595 dma_addr_t paddr = 0;
596 u8 *slice_header_vaddr;
597 u32 frame_width = frame->info.aligned_width;
598 u32 frame_height = frame->info.aligned_height;
599 u32 max_cpb_buffer_size;
600 unsigned int payload = stream->bytesused;
601 u32 max_bitrate;
602
603 /* check width and height parameters */
604 if ((frame_width > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H)) ||
605 (frame_height > max(H264_MAX_SIZE_W, H264_MAX_SIZE_H))) {
606 dev_err(dev,
607 "%s width(%d) or height(%d) exceeds limits (%dx%d)\n",
608 pctx->name, frame_width, frame_height,
609 H264_MAX_SIZE_W, H264_MAX_SIZE_H);
f7e1a6db 610 pctx->frame_errors++;
ba4616b7
JCT
611 return -EINVAL;
612 }
613
614 level = ctrls->level;
615
616 memset(td, 0, sizeof(struct hva_h264_td));
617
618 td->frame_width = frame_width;
619 td->frame_height = frame_height;
620
621 /* set frame alignement */
622 td->window_width = frame_width;
623 td->window_height = frame_height;
624 td->window_horizontal_offset = 0;
625 td->window_vertical_offset = 0;
626
627 td->first_picture_in_sequence = (!frame_num) ? 1 : 0;
628
629 /* pic_order_cnt_type hard coded to '2' as only I & P frames */
630 td->pic_order_cnt_type = 2;
631
632 /* useConstrainedIntraFlag set to false for better coding efficiency */
633 td->use_constrained_intra_flag = false;
634 td->brc_type = (ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
635 ? BRC_TYPE_CBR : BRC_TYPE_VBR;
636
637 td->entropy_coding_mode = (ctrls->entropy_mode == cavlc) ? CAVLC :
638 CABAC;
639
640 td->bit_rate = ctrls->bitrate;
641
642 /* set framerate, framerate = 1 n/ time per frame */
643 if (time_per_frame->numerator >= 536) {
644 /*
645 * due to a hardware bug, framerate denominator can't exceed
646 * 536 (BRC overflow). Compute nearest framerate
647 */
648 td->framerate_den = 1;
649 td->framerate_num = (time_per_frame->denominator +
650 (time_per_frame->numerator >> 1) - 1) /
651 time_per_frame->numerator;
652
653 /*
654 * update bitrate to introduce a correction due to
655 * the new framerate
656 * new bitrate = (old bitrate * new framerate) / old framerate
657 */
658 td->bit_rate /= time_per_frame->numerator;
659 td->bit_rate *= time_per_frame->denominator;
660 td->bit_rate /= td->framerate_num;
661 } else {
662 td->framerate_den = time_per_frame->numerator;
663 td->framerate_num = time_per_frame->denominator;
664 }
665
666 /* compute maximum bitrate depending on profile */
667 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
668 max_bitrate = h264_infos_list[level].max_bitrate *
669 H264_FACTOR_HIGH;
670 else
671 max_bitrate = h264_infos_list[level].max_bitrate *
672 H264_FACTOR_BASELINE;
673
674 /* check if bitrate doesn't exceed max size */
675 if (td->bit_rate > max_bitrate) {
676 dev_dbg(dev,
677 "%s bitrate (%d) larger than level and profile allow, clip to %d\n",
678 pctx->name, td->bit_rate, max_bitrate);
679 td->bit_rate = max_bitrate;
680 }
681
682 /* convert cpb_buffer_size in bits */
683 td->cpb_buffer_size = ctrls->cpb_size * 8000;
684
685 /* compute maximum cpb buffer size depending on profile */
686 if (ctrls->profile >= V4L2_MPEG_VIDEO_H264_PROFILE_HIGH)
687 max_cpb_buffer_size =
688 h264_infos_list[level].max_cpb_size * H264_FACTOR_HIGH;
689 else
690 max_cpb_buffer_size =
691 h264_infos_list[level].max_cpb_size * H264_FACTOR_BASELINE;
692
693 /* check if cpb buffer size doesn't exceed max size */
694 if (td->cpb_buffer_size > max_cpb_buffer_size) {
695 dev_dbg(dev,
696 "%s cpb size larger than level %d allows, clip to %d\n",
697 pctx->name, td->cpb_buffer_size, max_cpb_buffer_size);
698 td->cpb_buffer_size = max_cpb_buffer_size;
699 }
700
701 /* enable skipping in the Bitrate Controller */
702 td->brc_no_skip = 0;
703
704 /* initial delay */
705 if ((ctrls->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CBR) &&
706 td->bit_rate)
707 td->delay = 1000 * (td->cpb_buffer_size / td->bit_rate);
708 else
709 td->delay = 0;
710
711 switch (frame->info.pixelformat) {
712 case V4L2_PIX_FMT_NV12:
713 td->sampling_mode = SAMPLING_MODE_NV12;
714 break;
715 case V4L2_PIX_FMT_NV21:
716 td->sampling_mode = SAMPLING_MODE_NV21;
717 break;
718 default:
719 dev_err(dev, "%s invalid source pixel format\n",
720 pctx->name);
f7e1a6db 721 pctx->frame_errors++;
ba4616b7
JCT
722 return -EINVAL;
723 }
724
725 /*
726 * fill matrix color converter (RGB to YUV)
727 * Y = 0,299 R + 0,587 G + 0,114 B
728 * Cb = -0,1687 R -0,3313 G + 0,5 B + 128
729 * Cr = 0,5 R - 0,4187 G - 0,0813 B + 128
730 */
731 td->rgb2_yuv_y_coeff = 0x12031008;
732 td->rgb2_yuv_u_coeff = 0x800EF7FB;
733 td->rgb2_yuv_v_coeff = 0x80FEF40E;
734
735 /* enable/disable transform mode */
736 td->transform_mode = ctrls->dct8x8;
737
738 /* encoder complexity fix to 2, ENCODE_I_16x16_I_NxN_P_16x16_P_WxH */
739 td->encoder_complexity = 2;
740
741 /* quant fix to 28, default VBR value */
742 td->quant = 28;
743
744 if (td->framerate_den == 0) {
745 dev_err(dev, "%s invalid framerate\n", pctx->name);
f7e1a6db 746 pctx->frame_errors++;
ba4616b7
JCT
747 return -EINVAL;
748 }
749
750 /* if automatic framerate, deactivate bitrate controller */
751 if (td->framerate_num == 0)
752 td->brc_type = 0;
753
754 /* compliancy fix to true */
755 td->strict_hrd_compliancy = 1;
756
757 /* set minimum & maximum quantizers */
758 td->qp_min = clamp_val(ctrls->qpmin, 0, 51);
759 td->qp_max = clamp_val(ctrls->qpmax, 0, 51);
760
761 td->addr_source_buffer = frame->paddr;
762 td->addr_fwd_ref_buffer = fwd_ref_frame->paddr;
763 td->addr_rec_buffer = loc_rec_frame->paddr;
764
765 td->addr_output_bitstream_end = (u32)stream->paddr + stream->size;
766
767 td->addr_output_bitstream_start = (u32)stream->paddr;
768 td->bitstream_offset = (((u32)stream->paddr & 0xF) << 3) &
769 BITSTREAM_OFFSET_MASK;
770
771 td->addr_param_out = (u32)ctx->task->paddr +
772 offsetof(struct hva_h264_task, po);
773
774 /* swap spatial and temporal context */
775 if (frame_num % 2) {
776 paddr = seq_info->paddr;
777 td->addr_spatial_context = ALIGN(paddr, 0x100);
778 paddr = seq_info->paddr + DATA_SIZE(frame_width,
779 frame_height);
780 td->addr_temporal_context = ALIGN(paddr, 0x100);
781 } else {
782 paddr = seq_info->paddr;
783 td->addr_temporal_context = ALIGN(paddr, 0x100);
784 paddr = seq_info->paddr + DATA_SIZE(frame_width,
785 frame_height);
786 td->addr_spatial_context = ALIGN(paddr, 0x100);
787 }
788
789 paddr = seq_info->paddr + 2 * DATA_SIZE(frame_width, frame_height);
790
791 td->addr_brc_in_out_parameter = ALIGN(paddr, 0x100);
792
793 paddr = td->addr_brc_in_out_parameter + BRC_DATA_SIZE;
794 td->addr_slice_header = ALIGN(paddr, 0x100);
795 td->addr_external_sw = ALIGN(addr_esram, 0x100);
796
797 addr_esram += SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width);
798 td->addr_local_rec_buffer = ALIGN(addr_esram, 0x100);
799
800 addr_esram += LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width);
801 td->addr_lctx = ALIGN(addr_esram, 0x100);
802
803 addr_esram += CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height));
804 td->addr_cabac_context_buffer = ALIGN(addr_esram, 0x100);
805
806 if (!(frame_num % ctrls->gop_size)) {
807 td->picture_coding_type = PICTURE_CODING_TYPE_I;
808 stream->vbuf.flags |= V4L2_BUF_FLAG_KEYFRAME;
809 } else {
810 td->picture_coding_type = PICTURE_CODING_TYPE_P;
811 stream->vbuf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
812 }
813
814 /* fill the slice header part */
815 slice_header_vaddr = seq_info->vaddr + (td->addr_slice_header -
816 seq_info->paddr);
817
818 hva_h264_fill_slice_header(pctx, slice_header_vaddr, ctrls, frame_num,
819 &td->slice_header_size_in_bits,
820 &td->slice_header_offset0,
821 &td->slice_header_offset1,
822 &td->slice_header_offset2);
823
824 td->chroma_qp_index_offset = 2;
825 td->slice_synchro_enable = 0;
826 td->max_slice_number = 1;
827
828 /*
829 * check the sps/pps header size for key frame only
830 * sps/pps header was previously fill by libv4l
831 * during qbuf of stream buffer
832 */
833 if ((stream->vbuf.flags == V4L2_BUF_FLAG_KEYFRAME) &&
834 (payload > MAX_SPS_PPS_SIZE)) {
835 dev_err(dev, "%s invalid sps/pps size %d\n", pctx->name,
836 payload);
f7e1a6db 837 pctx->frame_errors++;
ba4616b7
JCT
838 return -EINVAL;
839 }
840
841 if (stream->vbuf.flags != V4L2_BUF_FLAG_KEYFRAME)
842 payload = 0;
843
844 /* add SEI nal (video stereo info) */
845 if (ctrls->sei_fp && hva_h264_fill_sei_nal(pctx, SEI_STEREO_VIDEO_INFO,
846 (u8 *)stream->vaddr,
847 &payload)) {
848 dev_err(dev, "%s fail to get SEI nal\n", pctx->name);
f7e1a6db 849 pctx->frame_errors++;
ba4616b7
JCT
850 return -EINVAL;
851 }
852
853 /* fill size of non-VCL NAL units (SPS, PPS, filler and SEI) */
854 td->non_vcl_nalu_size = payload * 8;
855
856 /* compute bitstream offset & new start address of bitstream */
857 td->addr_output_bitstream_start += ((payload >> 4) << 4);
858 td->bitstream_offset += (payload - ((payload >> 4) << 4)) * 8;
859
860 stream->bytesused = payload;
861
862 return 0;
863}
864
865static unsigned int hva_h264_get_stream_size(struct hva_h264_task *task)
866{
867 struct hva_h264_po *po = &task->po;
868
869 return po->bitstream_size;
870}
871
872static u32 hva_h264_get_stuffing_bytes(struct hva_h264_task *task)
873{
874 struct hva_h264_po *po = &task->po;
875
876 return po->stuffing_bits >> 3;
877}
878
879static int hva_h264_open(struct hva_ctx *pctx)
880{
881 struct device *dev = ctx_to_dev(pctx);
882 struct hva_h264_ctx *ctx;
883 struct hva_dev *hva = ctx_to_hdev(pctx);
884 u32 frame_width = pctx->frameinfo.aligned_width;
885 u32 frame_height = pctx->frameinfo.aligned_height;
886 u32 size;
887 int ret;
888
889 /* check esram size necessary to encode a frame */
890 size = SEARCH_WINDOW_BUFFER_MAX_SIZE(frame_width) +
891 LOCAL_RECONSTRUCTED_BUFFER_MAX_SIZE(frame_width) +
892 CTX_MB_BUFFER_MAX_SIZE(max(frame_width, frame_height)) +
893 CABAC_CONTEXT_BUFFER_MAX_SIZE(frame_width);
894
895 if (hva->esram_size < size) {
896 dev_err(dev, "%s not enough esram (max:%d request:%d)\n",
897 pctx->name, hva->esram_size, size);
898 ret = -EINVAL;
899 goto err;
900 }
901
902 /* allocate context for codec */
903 ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
904 if (!ctx) {
905 ret = -ENOMEM;
906 goto err;
907 }
908
909 /* allocate sequence info buffer */
910 ret = hva_mem_alloc(pctx,
911 2 * DATA_SIZE(frame_width, frame_height) +
912 SLICE_HEADER_SIZE +
913 BRC_DATA_SIZE,
914 "hva sequence info",
915 &ctx->seq_info);
916 if (ret) {
917 dev_err(dev,
918 "%s failed to allocate sequence info buffer\n",
919 pctx->name);
920 goto err_ctx;
921 }
922
923 /* allocate reference frame buffer */
924 ret = hva_mem_alloc(pctx,
925 frame_width * frame_height * 3 / 2,
926 "hva reference frame",
927 &ctx->ref_frame);
928 if (ret) {
929 dev_err(dev, "%s failed to allocate reference frame buffer\n",
930 pctx->name);
931 goto err_seq_info;
932 }
933
934 /* allocate reconstructed frame buffer */
935 ret = hva_mem_alloc(pctx,
936 frame_width * frame_height * 3 / 2,
937 "hva reconstructed frame",
938 &ctx->rec_frame);
939 if (ret) {
940 dev_err(dev,
941 "%s failed to allocate reconstructed frame buffer\n",
942 pctx->name);
943 goto err_ref_frame;
944 }
945
946 /* allocate task descriptor */
947 ret = hva_mem_alloc(pctx,
948 sizeof(struct hva_h264_task),
949 "hva task descriptor",
950 &ctx->task);
951 if (ret) {
952 dev_err(dev,
953 "%s failed to allocate task descriptor\n",
954 pctx->name);
955 goto err_rec_frame;
956 }
957
958 pctx->priv = (void *)ctx;
959
960 return 0;
961
962err_rec_frame:
963 hva_mem_free(pctx, ctx->rec_frame);
964err_ref_frame:
965 hva_mem_free(pctx, ctx->ref_frame);
966err_seq_info:
967 hva_mem_free(pctx, ctx->seq_info);
968err_ctx:
969 devm_kfree(dev, ctx);
970err:
f7e1a6db 971 pctx->sys_errors++;
ba4616b7
JCT
972 return ret;
973}
974
975static int hva_h264_close(struct hva_ctx *pctx)
976{
977 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
978 struct device *dev = ctx_to_dev(pctx);
979
980 if (ctx->seq_info)
981 hva_mem_free(pctx, ctx->seq_info);
982
983 if (ctx->ref_frame)
984 hva_mem_free(pctx, ctx->ref_frame);
985
986 if (ctx->rec_frame)
987 hva_mem_free(pctx, ctx->rec_frame);
988
989 if (ctx->task)
990 hva_mem_free(pctx, ctx->task);
991
992 devm_kfree(dev, ctx);
993
994 return 0;
995}
996
997static int hva_h264_encode(struct hva_ctx *pctx, struct hva_frame *frame,
998 struct hva_stream *stream)
999{
1000 struct hva_h264_ctx *ctx = (struct hva_h264_ctx *)pctx->priv;
1001 struct hva_h264_task *task = (struct hva_h264_task *)ctx->task->vaddr;
1002 struct hva_buffer *tmp_frame;
1003 u32 stuffing_bytes = 0;
1004 int ret = 0;
1005
1006 ret = hva_h264_prepare_task(pctx, task, frame, stream);
1007 if (ret)
1008 goto err;
1009
1010 ret = hva_hw_execute_task(pctx, H264_ENC, ctx->task);
1011 if (ret)
1012 goto err;
1013
1014 pctx->stream_num++;
1015 stream->bytesused += hva_h264_get_stream_size(task);
1016
1017 stuffing_bytes = hva_h264_get_stuffing_bytes(task);
1018
1019 if (stuffing_bytes)
1020 hva_h264_fill_data_nal(pctx, stuffing_bytes,
1021 (u8 *)stream->vaddr,
1022 stream->size,
1023 &stream->bytesused);
1024
1025 /* switch reference & reconstructed frame */
1026 tmp_frame = ctx->ref_frame;
1027 ctx->ref_frame = ctx->rec_frame;
1028 ctx->rec_frame = tmp_frame;
1029
1030 return 0;
1031err:
1032 stream->bytesused = 0;
1033 return ret;
1034}
1035
1036const struct hva_enc nv12h264enc = {
1037 .name = "H264(NV12)",
1038 .pixelformat = V4L2_PIX_FMT_NV12,
1039 .streamformat = V4L2_PIX_FMT_H264,
1040 .max_width = H264_MAX_SIZE_W,
1041 .max_height = H264_MAX_SIZE_H,
1042 .open = hva_h264_open,
1043 .close = hva_h264_close,
1044 .encode = hva_h264_encode,
1045};
1046
1047const struct hva_enc nv21h264enc = {
1048 .name = "H264(NV21)",
1049 .pixelformat = V4L2_PIX_FMT_NV21,
1050 .streamformat = V4L2_PIX_FMT_H264,
1051 .max_width = H264_MAX_SIZE_W,
1052 .max_height = H264_MAX_SIZE_H,
1053 .open = hva_h264_open,
1054 .close = hva_h264_close,
1055 .encode = hva_h264_encode,
1056};