1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2017 Intel Corporation
7 #include <rte_crypto.h>
8 #include <rte_cryptodev.h>
9 #include <rte_cycles.h>
10 #include <rte_malloc.h>
12 #include "cperf_ops.h"
13 #include "cperf_test_pmd_cyclecount.h"
14 #include "cperf_test_common.h"
16 #define PRETTY_HDR_FMT "%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n"
17 #define PRETTY_LINE_FMT "%12u%12u%12u%12u%12u%12u%12u%12.0f%12.0f%12.0f\n"
18 #define CSV_HDR_FMT "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"
19 #define CSV_LINE_FMT "%10u;%10u;%u;%u;%u;%u;%u;%.f3;%.f3;%.f3\n"
21 struct cperf_pmd_cyclecount_ctx
{
26 struct rte_mempool
*pool
;
27 struct rte_crypto_op
**ops
;
28 struct rte_crypto_op
**ops_processed
;
30 struct rte_cryptodev_sym_session
*sess
;
32 cperf_populate_ops_t populate_ops
;
34 uint32_t src_buf_offset
;
35 uint32_t dst_buf_offset
;
37 const struct cperf_options
*options
;
38 const struct cperf_test_vector
*test_vector
;
41 struct pmd_cyclecount_state
{
42 struct cperf_pmd_cyclecount_ctx
*ctx
;
43 const struct cperf_options
*opts
;
49 uint32_t ops_enq_retries
;
50 uint32_t ops_deq_retries
;
51 double cycles_per_build
;
52 double cycles_per_enq
;
53 double cycles_per_deq
;
56 static const uint16_t iv_offset
=
57 sizeof(struct rte_crypto_op
) + sizeof(struct rte_crypto_sym_op
);
60 cperf_pmd_cyclecount_test_free(struct cperf_pmd_cyclecount_ctx
*ctx
)
64 rte_cryptodev_sym_session_clear(ctx
->dev_id
, ctx
->sess
);
65 rte_cryptodev_sym_session_free(ctx
->sess
);
69 rte_mempool_free(ctx
->pool
);
74 if (ctx
->ops_processed
)
75 rte_free(ctx
->ops_processed
);
82 cperf_pmd_cyclecount_test_constructor(struct rte_mempool
*sess_mp
,
83 uint8_t dev_id
, uint16_t qp_id
,
84 const struct cperf_options
*options
,
85 const struct cperf_test_vector
*test_vector
,
86 const struct cperf_op_fns
*op_fns
)
88 struct cperf_pmd_cyclecount_ctx
*ctx
= NULL
;
90 /* preallocate buffers for crypto ops as they can get quite big */
91 size_t alloc_sz
= sizeof(struct rte_crypto_op
*) *
92 options
->nb_descriptors
;
94 ctx
= rte_malloc(NULL
, sizeof(struct cperf_pmd_cyclecount_ctx
), 0);
101 ctx
->populate_ops
= op_fns
->populate_ops
;
102 ctx
->options
= options
;
103 ctx
->test_vector
= test_vector
;
105 /* IV goes at the end of the crypto operation */
106 uint16_t iv_offset
= sizeof(struct rte_crypto_op
) +
107 sizeof(struct rte_crypto_sym_op
);
109 ctx
->sess
= op_fns
->sess_create(
110 sess_mp
, dev_id
, options
, test_vector
, iv_offset
);
111 if (ctx
->sess
== NULL
)
114 if (cperf_alloc_common_memory(options
, test_vector
, dev_id
, qp_id
, 0,
115 &ctx
->src_buf_offset
, &ctx
->dst_buf_offset
,
119 ctx
->ops
= rte_malloc("ops", alloc_sz
, 0);
123 ctx
->ops_processed
= rte_malloc("ops_processed", alloc_sz
, 0);
124 if (!ctx
->ops_processed
)
130 cperf_pmd_cyclecount_test_free(ctx
);
135 /* benchmark alloc-build-free of ops */
137 pmd_cyclecount_bench_ops(struct pmd_cyclecount_state
*state
, uint32_t cur_op
,
138 uint16_t test_burst_size
)
140 uint32_t iter_ops_left
= state
->opts
->total_ops
- cur_op
;
141 uint32_t iter_ops_needed
=
142 RTE_MIN(state
->opts
->nb_descriptors
, iter_ops_left
);
143 uint32_t cur_iter_op
;
144 uint32_t imix_idx
= 0;
146 for (cur_iter_op
= 0; cur_iter_op
< iter_ops_needed
;
147 cur_iter_op
+= test_burst_size
) {
148 uint32_t burst_size
= RTE_MIN(iter_ops_needed
- cur_iter_op
,
150 struct rte_crypto_op
**ops
= &state
->ctx
->ops
[cur_iter_op
];
152 /* Allocate objects containing crypto operations and mbufs */
153 if (rte_mempool_get_bulk(state
->ctx
->pool
, (void **)ops
,
156 "Failed to allocate more crypto operations "
157 "from the crypto operation pool.\n"
158 "Consider increasing the pool size "
163 /* Setup crypto op, attach mbuf etc */
164 (state
->ctx
->populate_ops
)(ops
,
165 state
->ctx
->src_buf_offset
,
166 state
->ctx
->dst_buf_offset
,
168 state
->ctx
->sess
, state
->opts
,
169 state
->ctx
->test_vector
, iv_offset
,
172 #ifdef CPERF_LINEARIZATION_ENABLE
173 /* Check if source mbufs require coalescing */
174 if (state
->linearize
) {
176 for (i
= 0; i
< burst_size
; i
++) {
177 struct rte_mbuf
*src
= ops
[i
]->sym
->m_src
;
178 rte_pktmbuf_linearize(src
);
181 #endif /* CPERF_LINEARIZATION_ENABLE */
182 rte_mempool_put_bulk(state
->ctx
->pool
, (void **)ops
,
189 /* allocate and build ops (no free) */
191 pmd_cyclecount_build_ops(struct pmd_cyclecount_state
*state
,
192 uint32_t iter_ops_needed
, uint16_t test_burst_size
)
194 uint32_t cur_iter_op
;
195 uint32_t imix_idx
= 0;
197 for (cur_iter_op
= 0; cur_iter_op
< iter_ops_needed
;
198 cur_iter_op
+= test_burst_size
) {
199 uint32_t burst_size
= RTE_MIN(
200 iter_ops_needed
- cur_iter_op
, test_burst_size
);
201 struct rte_crypto_op
**ops
= &state
->ctx
->ops
[cur_iter_op
];
203 /* Allocate objects containing crypto operations and mbufs */
204 if (rte_mempool_get_bulk(state
->ctx
->pool
, (void **)ops
,
207 "Failed to allocate more crypto operations "
208 "from the crypto operation pool.\n"
209 "Consider increasing the pool size "
214 /* Setup crypto op, attach mbuf etc */
215 (state
->ctx
->populate_ops
)(ops
,
216 state
->ctx
->src_buf_offset
,
217 state
->ctx
->dst_buf_offset
,
219 state
->ctx
->sess
, state
->opts
,
220 state
->ctx
->test_vector
, iv_offset
,
226 /* benchmark enqueue, returns number of ops enqueued */
228 pmd_cyclecount_bench_enq(struct pmd_cyclecount_state
*state
,
229 uint32_t iter_ops_needed
, uint16_t test_burst_size
)
231 /* Enqueue full descriptor ring of ops on crypto device */
232 uint32_t cur_iter_op
= 0;
233 while (cur_iter_op
< iter_ops_needed
) {
234 uint32_t burst_size
= RTE_MIN(iter_ops_needed
- cur_iter_op
,
236 struct rte_crypto_op
**ops
= &state
->ctx
->ops
[cur_iter_op
];
239 burst_enqd
= rte_cryptodev_enqueue_burst(state
->ctx
->dev_id
,
240 state
->ctx
->qp_id
, ops
, burst_size
);
242 /* if we couldn't enqueue anything, the queue is full */
244 /* don't try to dequeue anything we didn't enqueue */
248 if (burst_enqd
< burst_size
)
249 state
->ops_enq_retries
++;
250 state
->ops_enqd
+= burst_enqd
;
251 cur_iter_op
+= burst_enqd
;
253 return iter_ops_needed
;
256 /* benchmark dequeue */
258 pmd_cyclecount_bench_deq(struct pmd_cyclecount_state
*state
,
259 uint32_t iter_ops_needed
, uint16_t test_burst_size
)
261 /* Dequeue full descriptor ring of ops on crypto device */
262 uint32_t cur_iter_op
= 0;
263 while (cur_iter_op
< iter_ops_needed
) {
264 uint32_t burst_size
= RTE_MIN(iter_ops_needed
- cur_iter_op
,
266 struct rte_crypto_op
**ops_processed
=
267 &state
->ctx
->ops
[cur_iter_op
];
270 burst_deqd
= rte_cryptodev_dequeue_burst(state
->ctx
->dev_id
,
271 state
->ctx
->qp_id
, ops_processed
, burst_size
);
273 if (burst_deqd
< burst_size
)
274 state
->ops_deq_retries
++;
275 state
->ops_deqd
+= burst_deqd
;
276 cur_iter_op
+= burst_deqd
;
280 /* run benchmark per burst size */
282 pmd_cyclecount_bench_burst_sz(
283 struct pmd_cyclecount_state
*state
, uint16_t test_burst_size
)
292 /* reset all counters */
296 state
->ops_enq_retries
= 0;
298 state
->ops_deq_retries
= 0;
301 * Benchmark crypto op alloc-build-free separately.
303 tsc_start
= rte_rdtsc_precise();
305 for (cur_op
= 0; cur_op
< state
->opts
->total_ops
;
306 cur_op
+= state
->opts
->nb_descriptors
) {
307 if (unlikely(pmd_cyclecount_bench_ops(
308 state
, cur_op
, test_burst_size
)))
312 tsc_end
= rte_rdtsc_precise();
313 tsc_op
= tsc_end
- tsc_start
;
317 * Hardware acceleration cyclecount benchmarking loop.
319 * We're benchmarking raw enq/deq performance by filling up the device
320 * queue, so we never get any failed enqs unless the driver won't accept
321 * the exact number of descriptors we requested, or the driver won't
322 * wrap around the end of the TX ring. However, since we're only
323 * dequeueing once we've filled up the queue, we have to benchmark it
324 * piecemeal and then average out the results.
327 while (cur_op
< state
->opts
->total_ops
) {
328 uint32_t iter_ops_left
= state
->opts
->total_ops
- cur_op
;
329 uint32_t iter_ops_needed
= RTE_MIN(
330 state
->opts
->nb_descriptors
, iter_ops_left
);
331 uint32_t iter_ops_allocd
= iter_ops_needed
;
333 /* allocate and build ops */
334 if (unlikely(pmd_cyclecount_build_ops(state
, iter_ops_needed
,
338 tsc_start
= rte_rdtsc_precise();
340 /* fill up TX ring */
341 iter_ops_needed
= pmd_cyclecount_bench_enq(state
,
342 iter_ops_needed
, test_burst_size
);
344 tsc_end
= rte_rdtsc_precise();
346 tsc_enq
+= tsc_end
- tsc_start
;
348 /* allow for HW to catch up */
350 rte_delay_us_block(state
->delay
);
352 tsc_start
= rte_rdtsc_precise();
355 pmd_cyclecount_bench_deq(state
, iter_ops_needed
,
358 tsc_end
= rte_rdtsc_precise();
360 tsc_deq
+= tsc_end
- tsc_start
;
362 cur_op
+= iter_ops_needed
;
365 * we may not have processed all ops that we allocated, so
366 * free everything we've allocated.
368 rte_mempool_put_bulk(state
->ctx
->pool
,
369 (void **)state
->ctx
->ops
, iter_ops_allocd
);
372 state
->cycles_per_build
= (double)tsc_op
/ state
->opts
->total_ops
;
373 state
->cycles_per_enq
= (double)tsc_enq
/ state
->ops_enqd
;
374 state
->cycles_per_deq
= (double)tsc_deq
/ state
->ops_deqd
;
380 cperf_pmd_cyclecount_test_runner(void *test_ctx
)
382 struct pmd_cyclecount_state state
= {0};
383 const struct cperf_options
*opts
;
384 uint16_t test_burst_size
;
385 uint8_t burst_size_idx
= 0;
387 state
.ctx
= test_ctx
;
388 opts
= state
.ctx
->options
;
390 state
.lcore
= rte_lcore_id();
393 static int only_once
;
394 static bool warmup
= true;
397 * We need a small delay to allow for hardware to process all the crypto
398 * operations. We can't automatically figure out what the delay should
399 * be, so we leave it up to the user (by default it's 0).
401 state
.delay
= 1000 * opts
->pmdcc_delay
;
403 #ifdef CPERF_LINEARIZATION_ENABLE
404 struct rte_cryptodev_info dev_info
;
406 /* Check if source mbufs require coalescing */
407 if (opts
->segments_sz
< ctx
->options
->max_buffer_size
) {
408 rte_cryptodev_info_get(state
.ctx
->dev_id
, &dev_info
);
409 if ((dev_info
.feature_flags
&
410 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER
) ==
415 #endif /* CPERF_LINEARIZATION_ENABLE */
417 state
.ctx
->lcore_id
= state
.lcore
;
419 /* Get first size from range or list */
420 if (opts
->inc_burst_size
!= 0)
421 test_burst_size
= opts
->min_burst_size
;
423 test_burst_size
= opts
->burst_size_list
[0];
425 while (test_burst_size
<= opts
->max_burst_size
) {
426 /* do a benchmark run */
427 if (pmd_cyclecount_bench_burst_sz(&state
, test_burst_size
))
431 * First run is always a warm up run.
440 printf(PRETTY_HDR_FMT
, "lcore id", "Buf Size",
441 "Burst Size", "Enqueued",
442 "Dequeued", "Enq Retries",
443 "Deq Retries", "Cycles/Op",
444 "Cycles/Enq", "Cycles/Deq");
447 printf(PRETTY_LINE_FMT
, state
.ctx
->lcore_id
,
448 opts
->test_buffer_size
, test_burst_size
,
449 state
.ops_enqd
, state
.ops_deqd
,
450 state
.ops_enq_retries
,
451 state
.ops_deq_retries
,
452 state
.cycles_per_build
,
453 state
.cycles_per_enq
,
454 state
.cycles_per_deq
);
457 printf(CSV_HDR_FMT
, "# lcore id", "Buf Size",
458 "Burst Size", "Enqueued",
459 "Dequeued", "Enq Retries",
460 "Deq Retries", "Cycles/Op",
461 "Cycles/Enq", "Cycles/Deq");
464 printf(CSV_LINE_FMT
, state
.ctx
->lcore_id
,
465 opts
->test_buffer_size
, test_burst_size
,
466 state
.ops_enqd
, state
.ops_deqd
,
467 state
.ops_enq_retries
,
468 state
.ops_deq_retries
,
469 state
.cycles_per_build
,
470 state
.cycles_per_enq
,
471 state
.cycles_per_deq
);
474 /* Get next size from range or list */
475 if (opts
->inc_burst_size
!= 0)
476 test_burst_size
+= opts
->inc_burst_size
;
478 if (++burst_size_idx
== opts
->burst_size_count
)
480 test_burst_size
= opts
->burst_size_list
[burst_size_idx
];
488 cperf_pmd_cyclecount_test_destructor(void *arg
)
490 struct cperf_pmd_cyclecount_ctx
*ctx
= arg
;
495 cperf_pmd_cyclecount_test_free(ctx
);