1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
7 #include <rte_common.h>
8 #include <rte_bus_vdev.h>
9 #include <rte_malloc.h>
11 #include <rte_kvargs.h>
12 #include <rte_cycles.h>
14 #include <rte_bbdev.h>
15 #include <rte_bbdev_pmd.h>
17 #include <phy_turbo.h>
19 #include <phy_rate_match.h>
22 #define DRIVER_NAME baseband_turbo_sw
24 /* Turbo SW PMD logging ID */
25 static int bbdev_turbo_sw_logtype
;
27 /* Helper macro for logging */
28 #define rte_bbdev_log(level, fmt, ...) \
29 rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \
32 #define rte_bbdev_log_debug(fmt, ...) \
33 rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \
36 #define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_MAX_CB_SIZE >> 3) + 1) * 48)
37 #define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6)
38 #define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_MAX_CB_SIZE + 4) * 48)
40 /* private data structure */
41 struct bbdev_private
{
42 unsigned int max_nb_queues
; /**< Max number of queues */
45 /* Initialisation params structure that can be used by Turbo SW driver */
46 struct turbo_sw_params
{
47 int socket_id
; /*< Turbo SW device socket */
48 uint16_t queues_num
; /*< Turbo SW device queues number */
51 /* Accecptable params for Turbo SW devices */
52 #define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues"
53 #define TURBO_SW_SOCKET_ID_ARG "socket_id"
55 static const char * const turbo_sw_valid_params
[] = {
56 TURBO_SW_MAX_NB_QUEUES_ARG
,
57 TURBO_SW_SOCKET_ID_ARG
61 struct turbo_sw_queue
{
62 /* Ring for processed (encoded/decoded) operations which are ready to
65 struct rte_ring
*processed_pkts
;
66 /* Stores input for turbo encoder (used when CRC attachment is
70 /* Stores output from turbo encoder */
72 /* Alpha gamma buf for bblib_turbo_decoder() function */
74 /* Temp buf for bblib_turbo_decoder() function */
76 /* Input buf for bblib_rate_dematching_lte() function */
78 /* Output buf for bblib_rate_dematching_lte() function */
79 uint8_t *deint_output
;
80 /* Output buf for bblib_turbodec_adapter_lte() function */
81 uint8_t *adapter_output
;
82 /* Operation type of this queue */
83 enum rte_bbdev_op_type type
;
84 } __rte_cache_aligned
;
87 mbuf_append(struct rte_mbuf
*m_head
, struct rte_mbuf
*m
, uint16_t len
)
89 if (unlikely(len
> rte_pktmbuf_tailroom(m
)))
92 char *tail
= (char *)m
->buf_addr
+ m
->data_off
+ m
->data_len
;
93 m
->data_len
= (uint16_t)(m
->data_len
+ len
);
94 m_head
->pkt_len
= (m_head
->pkt_len
+ len
);
98 /* Calculate index based on Table 5.1.3-3 from TS34.212 */
100 compute_idx(uint16_t k
)
104 if (k
< RTE_BBDEV_MIN_CB_SIZE
|| k
> RTE_BBDEV_MAX_CB_SIZE
)
108 if ((k
- 2048) % 64 != 0)
111 result
= 124 + (k
- 2048) / 64;
112 } else if (k
<= 512) {
113 if ((k
- 40) % 8 != 0)
116 result
= (k
- 40) / 8 + 1;
117 } else if (k
<= 1024) {
118 if ((k
- 512) % 16 != 0)
121 result
= 60 + (k
- 512) / 16;
122 } else { /* 1024 < k <= 2048 */
123 if ((k
- 1024) % 32 != 0)
126 result
= 92 + (k
- 1024) / 32;
132 /* Read flag value 0/1 from bitmap */
134 check_bit(uint32_t bitmap
, uint32_t bitmask
)
136 return bitmap
& bitmask
;
139 /* Get device info */
141 info_get(struct rte_bbdev
*dev
, struct rte_bbdev_driver_info
*dev_info
)
143 struct bbdev_private
*internals
= dev
->data
->dev_private
;
145 static const struct rte_bbdev_op_cap bbdev_capabilities
[] = {
147 .type
= RTE_BBDEV_OP_TURBO_DEC
,
150 RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE
|
151 RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN
|
152 RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN
|
153 RTE_BBDEV_TURBO_CRC_TYPE_24B
|
154 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP
|
155 RTE_BBDEV_TURBO_EARLY_TERMINATION
,
156 .max_llr_modulus
= 16,
157 .num_buffers_src
= RTE_BBDEV_MAX_CODE_BLOCKS
,
158 .num_buffers_hard_out
=
159 RTE_BBDEV_MAX_CODE_BLOCKS
,
160 .num_buffers_soft_out
= 0,
164 .type
= RTE_BBDEV_OP_TURBO_ENC
,
167 RTE_BBDEV_TURBO_CRC_24B_ATTACH
|
168 RTE_BBDEV_TURBO_CRC_24A_ATTACH
|
169 RTE_BBDEV_TURBO_RATE_MATCH
|
170 RTE_BBDEV_TURBO_RV_INDEX_BYPASS
,
171 .num_buffers_src
= RTE_BBDEV_MAX_CODE_BLOCKS
,
172 .num_buffers_dst
= RTE_BBDEV_MAX_CODE_BLOCKS
,
175 RTE_BBDEV_END_OF_CAPABILITIES_LIST()
178 static struct rte_bbdev_queue_conf default_queue_conf
= {
179 .queue_size
= RTE_BBDEV_QUEUE_SIZE_LIMIT
,
182 static const enum rte_cpu_flag_t cpu_flag
= RTE_CPUFLAG_SSE4_2
;
184 default_queue_conf
.socket
= dev
->data
->socket_id
;
186 dev_info
->driver_name
= RTE_STR(DRIVER_NAME
);
187 dev_info
->max_num_queues
= internals
->max_nb_queues
;
188 dev_info
->queue_size_lim
= RTE_BBDEV_QUEUE_SIZE_LIMIT
;
189 dev_info
->hardware_accelerated
= false;
190 dev_info
->max_dl_queue_priority
= 0;
191 dev_info
->max_ul_queue_priority
= 0;
192 dev_info
->default_queue_conf
= default_queue_conf
;
193 dev_info
->capabilities
= bbdev_capabilities
;
194 dev_info
->cpu_flag_reqs
= &cpu_flag
;
195 dev_info
->min_alignment
= 64;
197 rte_bbdev_log_debug("got device info from %u\n", dev
->data
->dev_id
);
202 q_release(struct rte_bbdev
*dev
, uint16_t q_id
)
204 struct turbo_sw_queue
*q
= dev
->data
->queues
[q_id
].queue_private
;
207 rte_ring_free(q
->processed_pkts
);
208 rte_free(q
->enc_out
);
211 rte_free(q
->code_block
);
212 rte_free(q
->deint_input
);
213 rte_free(q
->deint_output
);
214 rte_free(q
->adapter_output
);
216 dev
->data
->queues
[q_id
].queue_private
= NULL
;
219 rte_bbdev_log_debug("released device queue %u:%u",
220 dev
->data
->dev_id
, q_id
);
226 q_setup(struct rte_bbdev
*dev
, uint16_t q_id
,
227 const struct rte_bbdev_queue_conf
*queue_conf
)
230 struct turbo_sw_queue
*q
;
231 char name
[RTE_RING_NAMESIZE
];
233 /* Allocate the queue data structure. */
234 q
= rte_zmalloc_socket(RTE_STR(DRIVER_NAME
), sizeof(*q
),
235 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
237 rte_bbdev_log(ERR
, "Failed to allocate queue memory");
241 /* Allocate memory for encoder output. */
242 ret
= snprintf(name
, RTE_RING_NAMESIZE
, RTE_STR(DRIVER_NAME
)"_enc_o%u:%u",
243 dev
->data
->dev_id
, q_id
);
244 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
246 "Creating queue name for device %u queue %u failed",
247 dev
->data
->dev_id
, q_id
);
248 return -ENAMETOOLONG
;
250 q
->enc_out
= rte_zmalloc_socket(name
,
251 ((RTE_BBDEV_MAX_TB_SIZE
>> 3) + 3) *
252 sizeof(*q
->enc_out
) * 3,
253 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
254 if (q
->enc_out
== NULL
) {
256 "Failed to allocate queue memory for %s", name
);
260 /* Allocate memory for rate matching output. */
261 ret
= snprintf(name
, RTE_RING_NAMESIZE
,
262 RTE_STR(DRIVER_NAME
)"_enc_i%u:%u", dev
->data
->dev_id
,
264 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
266 "Creating queue name for device %u queue %u failed",
267 dev
->data
->dev_id
, q_id
);
268 return -ENAMETOOLONG
;
270 q
->enc_in
= rte_zmalloc_socket(name
,
271 (RTE_BBDEV_MAX_CB_SIZE
>> 3) * sizeof(*q
->enc_in
),
272 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
273 if (q
->enc_in
== NULL
) {
275 "Failed to allocate queue memory for %s", name
);
279 /* Allocate memory for Aplha Gamma temp buffer. */
280 ret
= snprintf(name
, RTE_RING_NAMESIZE
, RTE_STR(DRIVER_NAME
)"_ag%u:%u",
281 dev
->data
->dev_id
, q_id
);
282 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
284 "Creating queue name for device %u queue %u failed",
285 dev
->data
->dev_id
, q_id
);
286 return -ENAMETOOLONG
;
288 q
->ag
= rte_zmalloc_socket(name
,
289 RTE_BBDEV_MAX_CB_SIZE
* 10 * sizeof(*q
->ag
),
290 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
293 "Failed to allocate queue memory for %s", name
);
297 /* Allocate memory for code block temp buffer. */
298 ret
= snprintf(name
, RTE_RING_NAMESIZE
, RTE_STR(DRIVER_NAME
)"_cb%u:%u",
299 dev
->data
->dev_id
, q_id
);
300 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
302 "Creating queue name for device %u queue %u failed",
303 dev
->data
->dev_id
, q_id
);
304 return -ENAMETOOLONG
;
306 q
->code_block
= rte_zmalloc_socket(name
,
307 RTE_BBDEV_MAX_CB_SIZE
* sizeof(*q
->code_block
),
308 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
309 if (q
->code_block
== NULL
) {
311 "Failed to allocate queue memory for %s", name
);
315 /* Allocate memory for Deinterleaver input. */
316 ret
= snprintf(name
, RTE_RING_NAMESIZE
,
317 RTE_STR(DRIVER_NAME
)"_de_i%u:%u",
318 dev
->data
->dev_id
, q_id
);
319 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
321 "Creating queue name for device %u queue %u failed",
322 dev
->data
->dev_id
, q_id
);
323 return -ENAMETOOLONG
;
325 q
->deint_input
= rte_zmalloc_socket(name
,
326 DEINT_INPUT_BUF_SIZE
* sizeof(*q
->deint_input
),
327 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
328 if (q
->deint_input
== NULL
) {
330 "Failed to allocate queue memory for %s", name
);
334 /* Allocate memory for Deinterleaver output. */
335 ret
= snprintf(name
, RTE_RING_NAMESIZE
,
336 RTE_STR(DRIVER_NAME
)"_de_o%u:%u",
337 dev
->data
->dev_id
, q_id
);
338 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
340 "Creating queue name for device %u queue %u failed",
341 dev
->data
->dev_id
, q_id
);
342 return -ENAMETOOLONG
;
344 q
->deint_output
= rte_zmalloc_socket(NULL
,
345 DEINT_OUTPUT_BUF_SIZE
* sizeof(*q
->deint_output
),
346 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
347 if (q
->deint_output
== NULL
) {
349 "Failed to allocate queue memory for %s", name
);
353 /* Allocate memory for Adapter output. */
354 ret
= snprintf(name
, RTE_RING_NAMESIZE
,
355 RTE_STR(DRIVER_NAME
)"_ada_o%u:%u",
356 dev
->data
->dev_id
, q_id
);
357 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
359 "Creating queue name for device %u queue %u failed",
360 dev
->data
->dev_id
, q_id
);
361 return -ENAMETOOLONG
;
363 q
->adapter_output
= rte_zmalloc_socket(NULL
,
364 ADAPTER_OUTPUT_BUF_SIZE
* sizeof(*q
->adapter_output
),
365 RTE_CACHE_LINE_SIZE
, queue_conf
->socket
);
366 if (q
->adapter_output
== NULL
) {
368 "Failed to allocate queue memory for %s", name
);
372 /* Create ring for packets awaiting to be dequeued. */
373 ret
= snprintf(name
, RTE_RING_NAMESIZE
, RTE_STR(DRIVER_NAME
)"%u:%u",
374 dev
->data
->dev_id
, q_id
);
375 if ((ret
< 0) || (ret
>= (int)RTE_RING_NAMESIZE
)) {
377 "Creating queue name for device %u queue %u failed",
378 dev
->data
->dev_id
, q_id
);
379 return -ENAMETOOLONG
;
381 q
->processed_pkts
= rte_ring_create(name
, queue_conf
->queue_size
,
382 queue_conf
->socket
, RING_F_SP_ENQ
| RING_F_SC_DEQ
);
383 if (q
->processed_pkts
== NULL
) {
384 rte_bbdev_log(ERR
, "Failed to create ring for %s", name
);
388 q
->type
= queue_conf
->op_type
;
390 dev
->data
->queues
[q_id
].queue_private
= q
;
391 rte_bbdev_log_debug("setup device queue %s", name
);
395 rte_ring_free(q
->processed_pkts
);
396 rte_free(q
->enc_out
);
399 rte_free(q
->code_block
);
400 rte_free(q
->deint_input
);
401 rte_free(q
->deint_output
);
402 rte_free(q
->adapter_output
);
407 static const struct rte_bbdev_ops pmd_ops
= {
408 .info_get
= info_get
,
409 .queue_setup
= q_setup
,
410 .queue_release
= q_release
413 /* Checks if the encoder input buffer is correct.
414 * Returns 0 if it's valid, -1 otherwise.
417 is_enc_input_valid(const uint16_t k
, const int32_t k_idx
,
418 const uint16_t in_length
)
421 rte_bbdev_log(ERR
, "K Index is invalid");
425 if (in_length
- (k
>> 3) < 0) {
427 "Mismatch between input length (%u bytes) and K (%u bits)",
432 if (k
> RTE_BBDEV_MAX_CB_SIZE
) {
433 rte_bbdev_log(ERR
, "CB size (%u) is too big, max: %d",
434 k
, RTE_BBDEV_MAX_CB_SIZE
);
441 /* Checks if the decoder input buffer is correct.
442 * Returns 0 if it's valid, -1 otherwise.
445 is_dec_input_valid(int32_t k_idx
, int16_t kw
, int16_t in_length
)
448 rte_bbdev_log(ERR
, "K index is invalid");
452 if (in_length
< kw
) {
454 "Mismatch between input length (%u) and kw (%u)",
459 if (kw
> RTE_BBDEV_MAX_KW
) {
460 rte_bbdev_log(ERR
, "Input length (%u) is too big, max: %d",
461 kw
, RTE_BBDEV_MAX_KW
);
469 process_enc_cb(struct turbo_sw_queue
*q
, struct rte_bbdev_enc_op
*op
,
470 uint8_t r
, uint8_t c
, uint16_t k
, uint16_t ncb
,
471 uint32_t e
, struct rte_mbuf
*m_in
, struct rte_mbuf
*m_out_head
,
472 struct rte_mbuf
*m_out
, uint16_t in_offset
, uint16_t out_offset
,
473 uint16_t in_length
, struct rte_bbdev_stats
*q_stats
)
478 uint8_t *in
, *out0
, *out1
, *out2
, *tmp_out
, *rm_out
;
479 uint64_t first_3_bytes
= 0;
480 struct rte_bbdev_op_turbo_enc
*enc
= &op
->turbo_enc
;
481 struct bblib_crc_request crc_req
;
482 struct bblib_crc_response crc_resp
;
483 struct bblib_turbo_encoder_request turbo_req
;
484 struct bblib_turbo_encoder_response turbo_resp
;
485 struct bblib_rate_match_dl_request rm_req
;
486 struct bblib_rate_match_dl_response rm_resp
;
487 #ifdef RTE_BBDEV_OFFLOAD_COST
490 RTE_SET_USED(q_stats
);
493 k_idx
= compute_idx(k
);
494 in
= rte_pktmbuf_mtod_offset(m_in
, uint8_t *, in_offset
);
496 /* CRC24A (for TB) */
497 if ((enc
->op_flags
& RTE_BBDEV_TURBO_CRC_24A_ATTACH
) &&
498 (enc
->code_block_mode
== 1)) {
499 ret
= is_enc_input_valid(k
- 24, k_idx
, in_length
);
501 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
505 crc_req
.len
= k
- 24;
506 /* Check if there is a room for CRC bits if not use
507 * the temporary buffer.
509 if (mbuf_append(m_in
, m_in
, 3) == NULL
) {
510 rte_memcpy(q
->enc_in
, in
, (k
- 24) >> 3);
513 /* Store 3 first bytes of next CB as they will be
514 * overwritten by CRC bytes. If it is the last CB then
515 * there is no point to store 3 next bytes and this
516 * if..else branch will be omitted.
518 first_3_bytes
= *((uint64_t *)&in
[(k
- 32) >> 3]);
522 #ifdef RTE_BBDEV_OFFLOAD_COST
523 start_time
= rte_rdtsc_precise();
525 /* CRC24A generation */
526 bblib_lte_crc24a_gen(&crc_req
, &crc_resp
);
527 #ifdef RTE_BBDEV_OFFLOAD_COST
528 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
530 } else if (enc
->op_flags
& RTE_BBDEV_TURBO_CRC_24B_ATTACH
) {
532 ret
= is_enc_input_valid(k
- 24, k_idx
, in_length
);
534 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
538 crc_req
.len
= k
- 24;
539 /* Check if there is a room for CRC bits if this is the last
540 * CB in TB. If not use temporary buffer.
542 if ((c
- r
== 1) && (mbuf_append(m_in
, m_in
, 3) == NULL
)) {
543 rte_memcpy(q
->enc_in
, in
, (k
- 24) >> 3);
545 } else if (c
- r
> 1) {
546 /* Store 3 first bytes of next CB as they will be
547 * overwritten by CRC bytes. If it is the last CB then
548 * there is no point to store 3 next bytes and this
549 * if..else branch will be omitted.
551 first_3_bytes
= *((uint64_t *)&in
[(k
- 32) >> 3]);
555 #ifdef RTE_BBDEV_OFFLOAD_COST
556 start_time
= rte_rdtsc_precise();
558 /* CRC24B generation */
559 bblib_lte_crc24b_gen(&crc_req
, &crc_resp
);
560 #ifdef RTE_BBDEV_OFFLOAD_COST
561 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
564 ret
= is_enc_input_valid(k
, k_idx
, in_length
);
566 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
573 /* Each bit layer output from turbo encoder is (k+4) bits long, i.e.
574 * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up.
575 * So dst_data's length should be 3*(k/8) + 3 bytes.
576 * In Rate-matching bypass case outputs pointers passed to encoder
577 * (out0, out1 and out2) can directly point to addresses of output from
580 if (enc
->op_flags
& RTE_BBDEV_TURBO_RATE_MATCH
) {
582 out1
= RTE_PTR_ADD(out0
, (k
>> 3) + 1);
583 out2
= RTE_PTR_ADD(out1
, (k
>> 3) + 1);
585 out0
= (uint8_t *)mbuf_append(m_out_head
, m_out
,
588 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
590 "Too little space in output mbuf");
593 enc
->output
.length
+= (k
>> 3) * 3 + 2;
594 /* rte_bbdev_op_data.offset can be different than the
595 * offset of the appended bytes
597 out0
= rte_pktmbuf_mtod_offset(m_out
, uint8_t *, out_offset
);
598 out1
= rte_pktmbuf_mtod_offset(m_out
, uint8_t *,
599 out_offset
+ (k
>> 3) + 1);
600 out2
= rte_pktmbuf_mtod_offset(m_out
, uint8_t *,
601 out_offset
+ 2 * ((k
>> 3) + 1));
604 turbo_req
.case_id
= k_idx
;
605 turbo_req
.input_win
= in
;
606 turbo_req
.length
= k
>> 3;
607 turbo_resp
.output_win_0
= out0
;
608 turbo_resp
.output_win_1
= out1
;
609 turbo_resp
.output_win_2
= out2
;
611 #ifdef RTE_BBDEV_OFFLOAD_COST
612 start_time
= rte_rdtsc_precise();
615 if (bblib_turbo_encoder(&turbo_req
, &turbo_resp
) != 0) {
616 op
->status
|= 1 << RTE_BBDEV_DRV_ERROR
;
617 rte_bbdev_log(ERR
, "Turbo Encoder failed");
620 #ifdef RTE_BBDEV_OFFLOAD_COST
621 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
624 /* Restore 3 first bytes of next CB if they were overwritten by CRC*/
625 if (first_3_bytes
!= 0)
626 *((uint64_t *)&in
[(k
- 32) >> 3]) = first_3_bytes
;
629 if (enc
->op_flags
& RTE_BBDEV_TURBO_RATE_MATCH
) {
631 /* Integer round up division by 8 */
632 uint16_t out_len
= (e
+ 7) >> 3;
633 /* The mask array is indexed using E%8. E is an even number so
634 * there are only 4 possible values.
636 const uint8_t mask_out
[] = {0xFF, 0xC0, 0xF0, 0xFC};
638 /* get output data starting address */
639 rm_out
= (uint8_t *)mbuf_append(m_out_head
, m_out
, out_len
);
640 if (rm_out
== NULL
) {
641 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
643 "Too little space in output mbuf");
646 /* rte_bbdev_op_data.offset can be different than the offset
647 * of the appended bytes
649 rm_out
= rte_pktmbuf_mtod_offset(m_out
, uint8_t *, out_offset
);
651 /* index of current code block */
653 /* total number of code block */
655 /* For DL - 1, UL - 0 */
656 rm_req
.direction
= 1;
657 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO
658 * and MDL_HARQ are used for Ncb calculation. As Ncb is already
659 * known we can adjust those parameters
661 rm_req
.Nsoft
= ncb
* rm_req
.C
;
664 /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G
665 * are used for E calculation. As E is already known we can
666 * adjust those parameters
670 rm_req
.G
= rm_req
.NL
* rm_req
.Qm
* rm_req
.C
;
672 rm_req
.rvidx
= enc
->rv_index
;
673 rm_req
.Kidx
= k_idx
- 1;
678 rm_resp
.output
= rm_out
;
679 rm_resp
.OutputLen
= out_len
;
680 if (enc
->op_flags
& RTE_BBDEV_TURBO_RV_INDEX_BYPASS
)
681 rm_req
.bypass_rvidx
= 1;
683 rm_req
.bypass_rvidx
= 0;
685 #ifdef RTE_BBDEV_OFFLOAD_COST
686 start_time
= rte_rdtsc_precise();
689 if (bblib_rate_match_dl(&rm_req
, &rm_resp
) != 0) {
690 op
->status
|= 1 << RTE_BBDEV_DRV_ERROR
;
691 rte_bbdev_log(ERR
, "Rate matching failed");
694 #ifdef RTE_BBDEV_OFFLOAD_COST
695 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
698 /* SW fills an entire last byte even if E%8 != 0. Clear the
699 * superfluous data bits for consistency with HW device.
701 mask_id
= (e
& 7) >> 1;
702 rm_out
[out_len
- 1] &= mask_out
[mask_id
];
703 enc
->output
.length
+= rm_resp
.OutputLen
;
705 /* Rate matching is bypassed */
707 /* Completing last byte of out0 (where 4 tail bits are stored)
708 * by moving first 4 bits from out1
710 tmp_out
= (uint8_t *) --out1
;
711 *tmp_out
= *tmp_out
| ((*(tmp_out
+ 1) & 0xF0) >> 4);
713 /* Shifting out1 data by 4 bits to the left */
714 for (m
= 0; m
< k
>> 3; ++m
) {
715 uint8_t *first
= tmp_out
;
716 uint8_t second
= *(tmp_out
+ 1);
717 *first
= (*first
<< 4) | ((second
& 0xF0) >> 4);
720 /* Shifting out2 data by 8 bits to the left */
721 for (m
= 0; m
< (k
>> 3) + 1; ++m
) {
722 *tmp_out
= *(tmp_out
+ 1);
730 enqueue_enc_one_op(struct turbo_sw_queue
*q
, struct rte_bbdev_enc_op
*op
,
731 struct rte_bbdev_stats
*queue_stats
)
733 uint8_t c
, r
, crc24_bits
= 0;
736 struct rte_bbdev_op_turbo_enc
*enc
= &op
->turbo_enc
;
737 uint16_t in_offset
= enc
->input
.offset
;
738 uint16_t out_offset
= enc
->output
.offset
;
739 struct rte_mbuf
*m_in
= enc
->input
.data
;
740 struct rte_mbuf
*m_out
= enc
->output
.data
;
741 struct rte_mbuf
*m_out_head
= enc
->output
.data
;
742 uint32_t in_length
, mbuf_total_left
= enc
->input
.length
;
743 uint16_t seg_total_left
;
745 /* Clear op status */
748 if (mbuf_total_left
> RTE_BBDEV_MAX_TB_SIZE
>> 3) {
749 rte_bbdev_log(ERR
, "TB size (%u) is too big, max: %d",
750 mbuf_total_left
, RTE_BBDEV_MAX_TB_SIZE
);
751 op
->status
= 1 << RTE_BBDEV_DATA_ERROR
;
755 if (m_in
== NULL
|| m_out
== NULL
) {
756 rte_bbdev_log(ERR
, "Invalid mbuf pointer");
757 op
->status
= 1 << RTE_BBDEV_DATA_ERROR
;
761 if ((enc
->op_flags
& RTE_BBDEV_TURBO_CRC_24B_ATTACH
) ||
762 (enc
->op_flags
& RTE_BBDEV_TURBO_CRC_24A_ATTACH
))
765 if (enc
->code_block_mode
== 0) { /* For Transport Block mode */
766 c
= enc
->tb_params
.c
;
767 r
= enc
->tb_params
.r
;
768 } else {/* For Code Block mode */
773 while (mbuf_total_left
> 0 && r
< c
) {
775 seg_total_left
= rte_pktmbuf_data_len(m_in
) - in_offset
;
777 if (enc
->code_block_mode
== 0) {
778 k
= (r
< enc
->tb_params
.c_neg
) ?
779 enc
->tb_params
.k_neg
: enc
->tb_params
.k_pos
;
780 ncb
= (r
< enc
->tb_params
.c_neg
) ?
781 enc
->tb_params
.ncb_neg
: enc
->tb_params
.ncb_pos
;
782 e
= (r
< enc
->tb_params
.cab
) ?
783 enc
->tb_params
.ea
: enc
->tb_params
.eb
;
785 k
= enc
->cb_params
.k
;
786 ncb
= enc
->cb_params
.ncb
;
787 e
= enc
->cb_params
.e
;
790 process_enc_cb(q
, op
, r
, c
, k
, ncb
, e
, m_in
, m_out_head
,
791 m_out
, in_offset
, out_offset
, seg_total_left
,
793 /* Update total_left */
794 in_length
= ((k
- crc24_bits
) >> 3);
795 mbuf_total_left
-= in_length
;
796 /* Update offsets for next CBs (if exist) */
797 in_offset
+= (k
- crc24_bits
) >> 3;
798 if (enc
->op_flags
& RTE_BBDEV_TURBO_RATE_MATCH
)
799 out_offset
+= e
>> 3;
801 out_offset
+= (k
>> 3) * 3 + 2;
804 if (seg_total_left
== in_length
) {
805 /* Go to the next mbuf */
814 /* check if all input data was processed */
815 if (mbuf_total_left
!= 0) {
816 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
818 "Mismatch between mbuf length and included CBs sizes");
822 static inline uint16_t
823 enqueue_enc_all_ops(struct turbo_sw_queue
*q
, struct rte_bbdev_enc_op
**ops
,
824 uint16_t nb_ops
, struct rte_bbdev_stats
*queue_stats
)
827 #ifdef RTE_BBDEV_OFFLOAD_COST
828 queue_stats
->acc_offload_cycles
= 0;
831 for (i
= 0; i
< nb_ops
; ++i
)
832 enqueue_enc_one_op(q
, ops
[i
], queue_stats
);
834 return rte_ring_enqueue_burst(q
->processed_pkts
, (void **)ops
, nb_ops
,
839 move_padding_bytes(const uint8_t *in
, uint8_t *out
, uint16_t k
,
843 uint16_t kpi
= ncb
/ 3;
844 uint16_t nd
= kpi
- d
;
846 rte_memcpy(&out
[nd
], in
, d
);
847 rte_memcpy(&out
[nd
+ kpi
+ 64], &in
[kpi
], d
);
848 rte_memcpy(&out
[(nd
- 1) + 2 * (kpi
+ 64)], &in
[2 * kpi
], d
);
852 process_dec_cb(struct turbo_sw_queue
*q
, struct rte_bbdev_dec_op
*op
,
853 uint8_t c
, uint16_t k
, uint16_t kw
, struct rte_mbuf
*m_in
,
854 struct rte_mbuf
*m_out_head
, struct rte_mbuf
*m_out
,
855 uint16_t in_offset
, uint16_t out_offset
, bool check_crc_24b
,
856 uint16_t crc24_overlap
, uint16_t in_length
,
857 struct rte_bbdev_stats
*q_stats
)
862 uint8_t *in
, *out
, *adapter_input
;
863 int32_t ncb
, ncb_without_null
;
864 struct bblib_turbo_adapter_ul_response adapter_resp
;
865 struct bblib_turbo_adapter_ul_request adapter_req
;
866 struct bblib_turbo_decoder_request turbo_req
;
867 struct bblib_turbo_decoder_response turbo_resp
;
868 struct rte_bbdev_op_turbo_dec
*dec
= &op
->turbo_dec
;
869 #ifdef RTE_BBDEV_OFFLOAD_COST
872 RTE_SET_USED(q_stats
);
875 k_idx
= compute_idx(k
);
877 ret
= is_dec_input_valid(k_idx
, kw
, in_length
);
879 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
883 in
= rte_pktmbuf_mtod_offset(m_in
, uint8_t *, in_offset
);
885 ncb_without_null
= (k
+ 4) * 3;
887 if (check_bit(dec
->op_flags
, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE
)) {
888 struct bblib_deinterleave_ul_request deint_req
;
889 struct bblib_deinterleave_ul_response deint_resp
;
891 deint_req
.circ_buffer
= BBLIB_FULL_CIRCULAR_BUFFER
;
892 deint_req
.pharqbuffer
= in
;
894 deint_resp
.pinteleavebuffer
= q
->deint_output
;
896 #ifdef RTE_BBDEV_OFFLOAD_COST
897 start_time
= rte_rdtsc_precise();
899 bblib_deinterleave_ul(&deint_req
, &deint_resp
);
900 #ifdef RTE_BBDEV_OFFLOAD_COST
901 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
904 move_padding_bytes(in
, q
->deint_output
, k
, ncb
);
906 adapter_input
= q
->deint_output
;
908 if (dec
->op_flags
& RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN
)
909 adapter_req
.isinverted
= 1;
910 else if (dec
->op_flags
& RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN
)
911 adapter_req
.isinverted
= 0;
913 op
->status
|= 1 << RTE_BBDEV_DRV_ERROR
;
914 rte_bbdev_log(ERR
, "LLR format wasn't specified");
918 adapter_req
.ncb
= ncb_without_null
;
919 adapter_req
.pinteleavebuffer
= adapter_input
;
920 adapter_resp
.pharqout
= q
->adapter_output
;
922 #ifdef RTE_BBDEV_OFFLOAD_COST
923 start_time
= rte_rdtsc_precise();
925 /* Turbo decode adaptation */
926 bblib_turbo_adapter_ul(&adapter_req
, &adapter_resp
);
927 #ifdef RTE_BBDEV_OFFLOAD_COST
928 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
931 out
= (uint8_t *)mbuf_append(m_out_head
, m_out
,
932 ((k
- crc24_overlap
) >> 3));
934 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
935 rte_bbdev_log(ERR
, "Too little space in output mbuf");
938 /* rte_bbdev_op_data.offset can be different than the offset of the
941 out
= rte_pktmbuf_mtod_offset(m_out
, uint8_t *, out_offset
);
946 turbo_req
.input
= (int8_t *)q
->adapter_output
;
948 turbo_req
.k_idx
= k_idx
;
949 turbo_req
.max_iter_num
= dec
->iter_max
;
950 turbo_req
.early_term_disable
= !check_bit(dec
->op_flags
,
951 RTE_BBDEV_TURBO_EARLY_TERMINATION
);
952 turbo_resp
.ag_buf
= q
->ag
;
953 turbo_resp
.cb_buf
= q
->code_block
;
954 turbo_resp
.output
= out
;
956 #ifdef RTE_BBDEV_OFFLOAD_COST
957 start_time
= rte_rdtsc_precise();
960 iter_cnt
= bblib_turbo_decoder(&turbo_req
, &turbo_resp
);
961 #ifdef RTE_BBDEV_OFFLOAD_COST
962 q_stats
->acc_offload_cycles
+= rte_rdtsc_precise() - start_time
;
964 dec
->hard_output
.length
+= (k
>> 3);
967 /* Temporary solution for returned iter_count from SDK */
968 iter_cnt
= (iter_cnt
- 1) >> 1;
969 dec
->iter_count
= RTE_MAX(iter_cnt
, dec
->iter_count
);
971 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
972 rte_bbdev_log(ERR
, "Turbo Decoder failed");
978 enqueue_dec_one_op(struct turbo_sw_queue
*q
, struct rte_bbdev_dec_op
*op
,
979 struct rte_bbdev_stats
*queue_stats
)
983 uint16_t crc24_overlap
= 0;
984 struct rte_bbdev_op_turbo_dec
*dec
= &op
->turbo_dec
;
985 struct rte_mbuf
*m_in
= dec
->input
.data
;
986 struct rte_mbuf
*m_out
= dec
->hard_output
.data
;
987 struct rte_mbuf
*m_out_head
= dec
->hard_output
.data
;
988 uint16_t in_offset
= dec
->input
.offset
;
989 uint16_t out_offset
= dec
->hard_output
.offset
;
990 uint32_t mbuf_total_left
= dec
->input
.length
;
991 uint16_t seg_total_left
;
993 /* Clear op status */
996 if (m_in
== NULL
|| m_out
== NULL
) {
997 rte_bbdev_log(ERR
, "Invalid mbuf pointer");
998 op
->status
= 1 << RTE_BBDEV_DATA_ERROR
;
1002 if (dec
->code_block_mode
== 0) { /* For Transport Block mode */
1003 c
= dec
->tb_params
.c
;
1004 } else { /* For Code Block mode */
1005 k
= dec
->cb_params
.k
;
1009 if ((c
> 1) && !check_bit(dec
->op_flags
,
1010 RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP
))
1013 while (mbuf_total_left
> 0) {
1014 if (dec
->code_block_mode
== 0)
1015 k
= (r
< dec
->tb_params
.c_neg
) ?
1016 dec
->tb_params
.k_neg
: dec
->tb_params
.k_pos
;
1018 seg_total_left
= rte_pktmbuf_data_len(m_in
) - in_offset
;
1020 /* Calculates circular buffer size (Kw).
1021 * According to 3gpp 36.212 section 5.1.4.2
1025 * where nCol is 32 and nRow can be calculated from:
1027 * where D is the size of each output from turbo encoder block
1030 kw
= RTE_ALIGN_CEIL(k
+ 4, RTE_BBDEV_C_SUBBLOCK
) * 3;
1032 process_dec_cb(q
, op
, c
, k
, kw
, m_in
, m_out_head
, m_out
,
1033 in_offset
, out_offset
, check_bit(dec
->op_flags
,
1034 RTE_BBDEV_TURBO_CRC_TYPE_24B
), crc24_overlap
,
1035 seg_total_left
, queue_stats
);
1036 /* To keep CRC24 attached to end of Code block, use
1037 * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it
1038 * removed by default once verified.
1041 mbuf_total_left
-= kw
;
1043 /* Update offsets */
1044 if (seg_total_left
== kw
) {
1045 /* Go to the next mbuf */
1047 m_out
= m_out
->next
;
1051 /* Update offsets for next CBs (if exist) */
1053 out_offset
+= ((k
- crc24_overlap
) >> 3);
1057 if (mbuf_total_left
!= 0) {
1058 op
->status
|= 1 << RTE_BBDEV_DATA_ERROR
;
1060 "Mismatch between mbuf length and included Circular buffer sizes");
1064 static inline uint16_t
1065 enqueue_dec_all_ops(struct turbo_sw_queue
*q
, struct rte_bbdev_dec_op
**ops
,
1066 uint16_t nb_ops
, struct rte_bbdev_stats
*queue_stats
)
1069 #ifdef RTE_BBDEV_OFFLOAD_COST
1070 queue_stats
->acc_offload_cycles
= 0;
1073 for (i
= 0; i
< nb_ops
; ++i
)
1074 enqueue_dec_one_op(q
, ops
[i
], queue_stats
);
1076 return rte_ring_enqueue_burst(q
->processed_pkts
, (void **)ops
, nb_ops
,
1082 enqueue_enc_ops(struct rte_bbdev_queue_data
*q_data
,
1083 struct rte_bbdev_enc_op
**ops
, uint16_t nb_ops
)
1085 void *queue
= q_data
->queue_private
;
1086 struct turbo_sw_queue
*q
= queue
;
1087 uint16_t nb_enqueued
= 0;
1089 nb_enqueued
= enqueue_enc_all_ops(q
, ops
, nb_ops
, &q_data
->queue_stats
);
1091 q_data
->queue_stats
.enqueue_err_count
+= nb_ops
- nb_enqueued
;
1092 q_data
->queue_stats
.enqueued_count
+= nb_enqueued
;
1099 enqueue_dec_ops(struct rte_bbdev_queue_data
*q_data
,
1100 struct rte_bbdev_dec_op
**ops
, uint16_t nb_ops
)
1102 void *queue
= q_data
->queue_private
;
1103 struct turbo_sw_queue
*q
= queue
;
1104 uint16_t nb_enqueued
= 0;
1106 nb_enqueued
= enqueue_dec_all_ops(q
, ops
, nb_ops
, &q_data
->queue_stats
);
1108 q_data
->queue_stats
.enqueue_err_count
+= nb_ops
- nb_enqueued
;
1109 q_data
->queue_stats
.enqueued_count
+= nb_enqueued
;
1114 /* Dequeue decode burst */
1116 dequeue_dec_ops(struct rte_bbdev_queue_data
*q_data
,
1117 struct rte_bbdev_dec_op
**ops
, uint16_t nb_ops
)
1119 struct turbo_sw_queue
*q
= q_data
->queue_private
;
1120 uint16_t nb_dequeued
= rte_ring_dequeue_burst(q
->processed_pkts
,
1121 (void **)ops
, nb_ops
, NULL
);
1122 q_data
->queue_stats
.dequeued_count
+= nb_dequeued
;
1127 /* Dequeue encode burst */
1129 dequeue_enc_ops(struct rte_bbdev_queue_data
*q_data
,
1130 struct rte_bbdev_enc_op
**ops
, uint16_t nb_ops
)
1132 struct turbo_sw_queue
*q
= q_data
->queue_private
;
1133 uint16_t nb_dequeued
= rte_ring_dequeue_burst(q
->processed_pkts
,
1134 (void **)ops
, nb_ops
, NULL
);
1135 q_data
->queue_stats
.dequeued_count
+= nb_dequeued
;
1140 /* Parse 16bit integer from string argument */
1142 parse_u16_arg(const char *key
, const char *value
, void *extra_args
)
1144 uint16_t *u16
= extra_args
;
1145 unsigned int long result
;
1147 if ((value
== NULL
) || (extra_args
== NULL
))
1150 result
= strtoul(value
, NULL
, 0);
1151 if ((result
>= (1 << 16)) || (errno
!= 0)) {
1152 rte_bbdev_log(ERR
, "Invalid value %lu for %s", result
, key
);
1155 *u16
= (uint16_t)result
;
1159 /* Parse parameters used to create device */
1161 parse_turbo_sw_params(struct turbo_sw_params
*params
, const char *input_args
)
1163 struct rte_kvargs
*kvlist
= NULL
;
1169 kvlist
= rte_kvargs_parse(input_args
, turbo_sw_valid_params
);
1173 ret
= rte_kvargs_process(kvlist
, turbo_sw_valid_params
[0],
1174 &parse_u16_arg
, ¶ms
->queues_num
);
1178 ret
= rte_kvargs_process(kvlist
, turbo_sw_valid_params
[1],
1179 &parse_u16_arg
, ¶ms
->socket_id
);
1183 if (params
->socket_id
>= RTE_MAX_NUMA_NODES
) {
1184 rte_bbdev_log(ERR
, "Invalid socket, must be < %u",
1185 RTE_MAX_NUMA_NODES
);
1192 rte_kvargs_free(kvlist
);
1198 turbo_sw_bbdev_create(struct rte_vdev_device
*vdev
,
1199 struct turbo_sw_params
*init_params
)
1201 struct rte_bbdev
*bbdev
;
1202 const char *name
= rte_vdev_device_name(vdev
);
1204 bbdev
= rte_bbdev_allocate(name
);
1208 bbdev
->data
->dev_private
= rte_zmalloc_socket(name
,
1209 sizeof(struct bbdev_private
), RTE_CACHE_LINE_SIZE
,
1210 init_params
->socket_id
);
1211 if (bbdev
->data
->dev_private
== NULL
) {
1212 rte_bbdev_release(bbdev
);
1216 bbdev
->dev_ops
= &pmd_ops
;
1217 bbdev
->device
= &vdev
->device
;
1218 bbdev
->data
->socket_id
= init_params
->socket_id
;
1219 bbdev
->intr_handle
= NULL
;
1221 /* register rx/tx burst functions for data path */
1222 bbdev
->dequeue_enc_ops
= dequeue_enc_ops
;
1223 bbdev
->dequeue_dec_ops
= dequeue_dec_ops
;
1224 bbdev
->enqueue_enc_ops
= enqueue_enc_ops
;
1225 bbdev
->enqueue_dec_ops
= enqueue_dec_ops
;
1226 ((struct bbdev_private
*) bbdev
->data
->dev_private
)->max_nb_queues
=
1227 init_params
->queues_num
;
1232 /* Initialise device */
1234 turbo_sw_bbdev_probe(struct rte_vdev_device
*vdev
)
1236 struct turbo_sw_params init_params
= {
1238 RTE_BBDEV_DEFAULT_MAX_NB_QUEUES
1241 const char *input_args
;
1246 name
= rte_vdev_device_name(vdev
);
1249 input_args
= rte_vdev_device_args(vdev
);
1250 parse_turbo_sw_params(&init_params
, input_args
);
1252 rte_bbdev_log_debug(
1253 "Initialising %s on NUMA node %d with max queues: %d\n",
1254 name
, init_params
.socket_id
, init_params
.queues_num
);
1256 return turbo_sw_bbdev_create(vdev
, &init_params
);
1259 /* Uninitialise device */
1261 turbo_sw_bbdev_remove(struct rte_vdev_device
*vdev
)
1263 struct rte_bbdev
*bbdev
;
1269 name
= rte_vdev_device_name(vdev
);
1273 bbdev
= rte_bbdev_get_named_dev(name
);
1277 rte_free(bbdev
->data
->dev_private
);
1279 return rte_bbdev_release(bbdev
);
1282 static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv
= {
1283 .probe
= turbo_sw_bbdev_probe
,
1284 .remove
= turbo_sw_bbdev_remove
1287 RTE_PMD_REGISTER_VDEV(DRIVER_NAME
, bbdev_turbo_sw_pmd_drv
);
1288 RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME
,
1289 TURBO_SW_MAX_NB_QUEUES_ARG
"=<int> "
1290 TURBO_SW_SOCKET_ID_ARG
"=<int>");
1291 RTE_PMD_REGISTER_ALIAS(DRIVER_NAME
, turbo_sw
);
1293 RTE_INIT(turbo_sw_bbdev_init_log
)
1295 bbdev_turbo_sw_logtype
= rte_log_register("pmd.bb.turbo_sw");
1296 if (bbdev_turbo_sw_logtype
>= 0)
1297 rte_log_set_level(bbdev_turbo_sw_logtype
, RTE_LOG_NOTICE
);