1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
5 #include <rte_malloc.h>
8 #include <rte_cycles.h>
9 #include <rte_compressdev.h>
11 #include "comp_perf_test_benchmark.h"
14 main_loop(struct comp_test_data
*test_data
, uint8_t level
,
15 enum rte_comp_xform_type type
)
17 uint8_t dev_id
= test_data
->cdev_id
;
18 uint32_t i
, iter
, num_iter
;
19 struct rte_comp_op
**ops
, **deq_ops
;
20 void *priv_xform
= NULL
;
21 struct rte_comp_xform xform
;
22 struct rte_mbuf
**input_bufs
, **output_bufs
;
27 if (test_data
== NULL
|| !test_data
->burst_sz
) {
29 "Unknown burst size\n");
33 ops
= rte_zmalloc_socket(NULL
,
34 2 * test_data
->total_bufs
* sizeof(struct rte_comp_op
*),
39 "Can't allocate memory for ops strucures\n");
43 deq_ops
= &ops
[test_data
->total_bufs
];
45 if (type
== RTE_COMP_COMPRESS
) {
46 xform
= (struct rte_comp_xform
) {
47 .type
= RTE_COMP_COMPRESS
,
49 .algo
= RTE_COMP_ALGO_DEFLATE
,
50 .deflate
.huffman
= test_data
->huffman_enc
,
52 .window_size
= test_data
->window_sz
,
53 .chksum
= RTE_COMP_CHECKSUM_NONE
,
54 .hash_algo
= RTE_COMP_HASH_ALGO_NONE
57 input_bufs
= test_data
->decomp_bufs
;
58 output_bufs
= test_data
->comp_bufs
;
59 out_seg_sz
= test_data
->out_seg_sz
;
61 xform
= (struct rte_comp_xform
) {
62 .type
= RTE_COMP_DECOMPRESS
,
64 .algo
= RTE_COMP_ALGO_DEFLATE
,
65 .chksum
= RTE_COMP_CHECKSUM_NONE
,
66 .window_size
= test_data
->window_sz
,
67 .hash_algo
= RTE_COMP_HASH_ALGO_NONE
70 input_bufs
= test_data
->comp_bufs
;
71 output_bufs
= test_data
->decomp_bufs
;
72 out_seg_sz
= test_data
->seg_sz
;
75 /* Create private xform */
76 if (rte_compressdev_private_xform_create(dev_id
, &xform
,
78 RTE_LOG(ERR
, USER1
, "Private xform could not be created\n");
83 uint64_t tsc_start
, tsc_end
, tsc_duration
;
85 tsc_start
= tsc_end
= tsc_duration
= 0;
86 tsc_start
= rte_rdtsc();
87 num_iter
= test_data
->num_iter
;
89 for (iter
= 0; iter
< num_iter
; iter
++) {
90 uint32_t total_ops
= test_data
->total_bufs
;
91 uint32_t remaining_ops
= test_data
->total_bufs
;
92 uint32_t total_deq_ops
= 0;
93 uint32_t total_enq_ops
= 0;
94 uint16_t ops_unused
= 0;
98 while (remaining_ops
> 0) {
99 uint16_t num_ops
= RTE_MIN(remaining_ops
,
100 test_data
->burst_sz
);
101 uint16_t ops_needed
= num_ops
- ops_unused
;
104 * Move the unused operations from the previous
105 * enqueue_burst call to the front, to maintain order
107 if ((ops_unused
> 0) && (num_enq
> 0)) {
109 ops_unused
* sizeof(struct rte_comp_op
*);
111 memmove(ops
, &ops
[num_enq
], nb_b_to_mov
);
114 /* Allocate compression operations */
115 if (ops_needed
&& !rte_comp_op_bulk_alloc(
120 "Could not allocate enough operations\n");
124 allocated
+= ops_needed
;
126 for (i
= 0; i
< ops_needed
; i
++) {
128 * Calculate next buffer to attach to operation
130 uint32_t buf_id
= total_enq_ops
+ i
+
132 uint16_t op_id
= ops_unused
+ i
;
133 /* Reset all data in output buffers */
134 struct rte_mbuf
*m
= output_bufs
[buf_id
];
136 m
->pkt_len
= out_seg_sz
* m
->nb_segs
;
138 m
->data_len
= m
->buf_len
- m
->data_off
;
141 ops
[op_id
]->m_src
= input_bufs
[buf_id
];
142 ops
[op_id
]->m_dst
= output_bufs
[buf_id
];
143 ops
[op_id
]->src
.offset
= 0;
144 ops
[op_id
]->src
.length
=
145 rte_pktmbuf_pkt_len(input_bufs
[buf_id
]);
146 ops
[op_id
]->dst
.offset
= 0;
147 ops
[op_id
]->flush_flag
= RTE_COMP_FLUSH_FINAL
;
148 ops
[op_id
]->input_chksum
= buf_id
;
149 ops
[op_id
]->private_xform
= priv_xform
;
152 num_enq
= rte_compressdev_enqueue_burst(dev_id
, 0, ops
,
155 struct rte_compressdev_stats stats
;
157 rte_compressdev_stats_get(dev_id
, &stats
);
158 if (stats
.enqueue_err_count
) {
164 ops_unused
= num_ops
- num_enq
;
165 remaining_ops
-= num_enq
;
166 total_enq_ops
+= num_enq
;
168 num_deq
= rte_compressdev_dequeue_burst(dev_id
, 0,
170 test_data
->burst_sz
);
171 total_deq_ops
+= num_deq
;
173 if (iter
== num_iter
- 1) {
174 for (i
= 0; i
< num_deq
; i
++) {
175 struct rte_comp_op
*op
= deq_ops
[i
];
178 RTE_COMP_OP_STATUS_SUCCESS
) {
180 "Some operations were not successful\n");
184 struct rte_mbuf
*m
= op
->m_dst
;
186 m
->pkt_len
= op
->produced
;
187 uint32_t remaining_data
= op
->produced
;
188 uint16_t data_to_append
;
190 while (remaining_data
> 0) {
192 RTE_MIN(remaining_data
,
194 m
->data_len
= data_to_append
;
201 rte_mempool_put_bulk(test_data
->op_pool
,
202 (void **)deq_ops
, num_deq
);
203 allocated
-= num_deq
;
206 /* Dequeue the last operations */
207 while (total_deq_ops
< total_ops
) {
208 num_deq
= rte_compressdev_dequeue_burst(dev_id
, 0,
209 deq_ops
, test_data
->burst_sz
);
211 struct rte_compressdev_stats stats
;
213 rte_compressdev_stats_get(dev_id
, &stats
);
214 if (stats
.dequeue_err_count
) {
220 total_deq_ops
+= num_deq
;
222 if (iter
== num_iter
- 1) {
223 for (i
= 0; i
< num_deq
; i
++) {
224 struct rte_comp_op
*op
= deq_ops
[i
];
227 RTE_COMP_OP_STATUS_SUCCESS
) {
229 "Some operations were not successful\n");
233 struct rte_mbuf
*m
= op
->m_dst
;
235 m
->pkt_len
= op
->produced
;
236 uint32_t remaining_data
= op
->produced
;
237 uint16_t data_to_append
;
239 while (remaining_data
> 0) {
241 RTE_MIN(remaining_data
,
243 m
->data_len
= data_to_append
;
250 rte_mempool_put_bulk(test_data
->op_pool
,
251 (void **)deq_ops
, num_deq
);
252 allocated
-= num_deq
;
256 tsc_end
= rte_rdtsc();
257 tsc_duration
= tsc_end
- tsc_start
;
259 if (type
== RTE_COMP_COMPRESS
)
260 test_data
->comp_tsc_duration
[level
] =
261 tsc_duration
/ num_iter
;
263 test_data
->decomp_tsc_duration
[level
] =
264 tsc_duration
/ num_iter
;
267 rte_mempool_put_bulk(test_data
->op_pool
, (void **)ops
, allocated
);
268 rte_compressdev_private_xform_free(dev_id
, priv_xform
);
274 cperf_benchmark(struct comp_test_data
*test_data
, uint8_t level
)
276 int i
, ret
= EXIT_SUCCESS
;
279 * Run the tests twice, discarding the first performance
280 * results, before the cache is warmed up
282 for (i
= 0; i
< 2; i
++) {
283 if (main_loop(test_data
, level
, RTE_COMP_COMPRESS
) < 0) {
289 for (i
= 0; i
< 2; i
++) {
290 if (main_loop(test_data
, level
, RTE_COMP_DECOMPRESS
) < 0) {
296 test_data
->comp_tsc_byte
=
297 (double)(test_data
->comp_tsc_duration
[level
]) /
298 test_data
->input_data_sz
;
300 test_data
->decomp_tsc_byte
=
301 (double)(test_data
->decomp_tsc_duration
[level
]) /
302 test_data
->input_data_sz
;
304 test_data
->comp_gbps
= rte_get_tsc_hz() / test_data
->comp_tsc_byte
* 8 /
307 test_data
->decomp_gbps
= rte_get_tsc_hz() / test_data
->decomp_tsc_byte