]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/test/bpf/mbuf.h
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / test / bpf / mbuf.h
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation.
3 * Copyright 2014 6WIND S.A.
4 */
5
6 /*
7 * Snipper from dpdk.org rte_mbuf.h.
8 * used to provide BPF programs information about rte_mbuf layout.
9 */
10
11 #ifndef _MBUF_H_
12 #define _MBUF_H_
13
14 #include <stdint.h>
15 #include <rte_common.h>
16 #include <rte_memory.h>
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 /*
23 * Packet Offload Features Flags. It also carry packet type information.
24 * Critical resources. Both rx/tx shared these bits. Be cautious on any change
25 *
26 * - RX flags start at bit position zero, and get added to the left of previous
27 * flags.
28 * - The most-significant 3 bits are reserved for generic mbuf flags
29 * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
30 * added to the right of the previously defined flags i.e. they should count
31 * downwards, not upwards.
32 *
33 * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
34 * rte_get_tx_ol_flag_name().
35 */
36
37 /**
38 * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
39 * the packet is recognized as a VLAN, but the behavior between PMDs
40 * was not the same. This flag is kept for some time to avoid breaking
41 * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
42 */
43 #define PKT_RX_VLAN_PKT (1ULL << 0)
44
45 #define PKT_RX_RSS_HASH (1ULL << 1)
46 /**< RX packet with RSS hash result. */
47 #define PKT_RX_FDIR (1ULL << 2)
48 /**< RX packet with FDIR match indicate. */
49
50 /**
51 * Deprecated.
52 * Checking this flag alone is deprecated: check the 2 bits of
53 * PKT_RX_L4_CKSUM_MASK.
54 * This flag was set when the L4 checksum of a packet was detected as
55 * wrong by the hardware.
56 */
57 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
58
59 /**
60 * Deprecated.
61 * Checking this flag alone is deprecated: check the 2 bits of
62 * PKT_RX_IP_CKSUM_MASK.
63 * This flag was set when the IP checksum of a packet was detected as
64 * wrong by the hardware.
65 */
66 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
67
68 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5)
69 /**< External IP header checksum error. */
70
71 /**
72 * A vlan has been stripped by the hardware and its tci is saved in
73 * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
74 * in the RX configuration of the PMD.
75 */
76 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
77
78 /**
79 * Mask of bits used to determine the status of RX IP checksum.
80 * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
81 * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
82 * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
83 * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
84 * data, but the integrity of the IP header is verified.
85 */
86 #define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
87
88 #define PKT_RX_IP_CKSUM_UNKNOWN 0
89 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
90 #define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
91 #define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
92
93 /**
94 * Mask of bits used to determine the status of RX L4 checksum.
95 * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
96 * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
97 * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
98 * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
99 * data, but the integrity of the L4 data is verified.
100 */
101 #define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
102
103 #define PKT_RX_L4_CKSUM_UNKNOWN 0
104 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
105 #define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
106 #define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
107
108 #define PKT_RX_IEEE1588_PTP (1ULL << 9)
109 /**< RX IEEE1588 L2 Ethernet PT Packet. */
110 #define PKT_RX_IEEE1588_TMST (1ULL << 10)
111 /**< RX IEEE1588 L2/L4 timestamped packet.*/
112 #define PKT_RX_FDIR_ID (1ULL << 13)
113 /**< FD id reported if FDIR match. */
114 #define PKT_RX_FDIR_FLX (1ULL << 14)
115 /**< Flexible bytes reported if FDIR match. */
116
117 /**
118 * The 2 vlans have been stripped by the hardware and their tci are
119 * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
120 * This can only happen if vlan stripping is enabled in the RX
121 * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
122 * must also be set.
123 */
124 #define PKT_RX_QINQ_STRIPPED (1ULL << 15)
125
126 /**
127 * Deprecated.
128 * RX packet with double VLAN stripped.
129 * This flag is replaced by PKT_RX_QINQ_STRIPPED.
130 */
131 #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
132
133 /**
134 * When packets are coalesced by a hardware or virtual driver, this flag
135 * can be set in the RX mbuf, meaning that the m->tso_segsz field is
136 * valid and is set to the segment size of original packets.
137 */
138 #define PKT_RX_LRO (1ULL << 16)
139
140 /**
141 * Indicate that the timestamp field in the mbuf is valid.
142 */
143 #define PKT_RX_TIMESTAMP (1ULL << 17)
144
145 /* add new RX flags here */
146
147 /* add new TX flags here */
148
149 /**
150 * Offload the MACsec. This flag must be set by the application to enable
151 * this offload feature for a packet to be transmitted.
152 */
153 #define PKT_TX_MACSEC (1ULL << 44)
154
155 /**
156 * Bits 45:48 used for the tunnel type.
157 * When doing Tx offload like TSO or checksum, the HW needs to configure the
158 * tunnel type into the HW descriptors.
159 */
160 #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
161 #define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
162 #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
163 #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
164 /**< TX packet with MPLS-in-UDP RFC 7510 header. */
165 #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
166 /* add new TX TUNNEL type here */
167 #define PKT_TX_TUNNEL_MASK (0xFULL << 45)
168
169 /**
170 * Second VLAN insertion (QinQ) flag.
171 */
172 #define PKT_TX_QINQ_PKT (1ULL << 49)
173 /**< TX packet with double VLAN inserted. */
174
175 /**
176 * TCP segmentation offload. To enable this offload feature for a
177 * packet to be transmitted on hardware supporting TSO:
178 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
179 * PKT_TX_TCP_CKSUM)
180 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
181 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
182 * to 0 in the packet
183 * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
184 * - calculate the pseudo header checksum without taking ip_len in account,
185 * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
186 * rte_ipv6_phdr_cksum() that can be used as helpers.
187 */
188 #define PKT_TX_TCP_SEG (1ULL << 50)
189
190 #define PKT_TX_IEEE1588_TMST (1ULL << 51)
191 /**< TX IEEE1588 packet to timestamp. */
192
193 /**
194 * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
195 * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
196 * L4 checksum offload, the user needs to:
197 * - fill l2_len and l3_len in mbuf
198 * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
199 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
200 * - calculate the pseudo header checksum and set it in the L4 header (only
201 * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
202 * For SCTP, set the crc field to 0.
203 */
204 #define PKT_TX_L4_NO_CKSUM (0ULL << 52)
205 /**< Disable L4 cksum of TX pkt. */
206 #define PKT_TX_TCP_CKSUM (1ULL << 52)
207 /**< TCP cksum of TX pkt. computed by NIC. */
208 #define PKT_TX_SCTP_CKSUM (2ULL << 52)
209 /**< SCTP cksum of TX pkt. computed by NIC. */
210 #define PKT_TX_UDP_CKSUM (3ULL << 52)
211 /**< UDP cksum of TX pkt. computed by NIC. */
212 #define PKT_TX_L4_MASK (3ULL << 52)
213 /**< Mask for L4 cksum offload request. */
214
215 /**
216 * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
217 * also be set by the application, although a PMD will only check
218 * PKT_TX_IP_CKSUM.
219 * - set the IP checksum field in the packet to 0
220 * - fill the mbuf offload information: l2_len, l3_len
221 */
222 #define PKT_TX_IP_CKSUM (1ULL << 54)
223
224 /**
225 * Packet is IPv4. This flag must be set when using any offload feature
226 * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
227 * packet. If the packet is a tunneled packet, this flag is related to
228 * the inner headers.
229 */
230 #define PKT_TX_IPV4 (1ULL << 55)
231
232 /**
233 * Packet is IPv6. This flag must be set when using an offload feature
234 * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
235 * packet. If the packet is a tunneled packet, this flag is related to
236 * the inner headers.
237 */
238 #define PKT_TX_IPV6 (1ULL << 56)
239
240 #define PKT_TX_VLAN_PKT (1ULL << 57)
241 /**< TX packet is a 802.1q VLAN packet. */
242
243 /**
244 * Offload the IP checksum of an external header in the hardware. The
245 * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
246 * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
247 * packet must be set to 0.
248 * - set the outer IP checksum field in the packet to 0
249 * - fill the mbuf offload information: outer_l2_len, outer_l3_len
250 */
251 #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
252
253 /**
254 * Packet outer header is IPv4. This flag must be set when using any
255 * outer offload feature (L3 or L4 checksum) to tell the NIC that the
256 * outer header of the tunneled packet is an IPv4 packet.
257 */
258 #define PKT_TX_OUTER_IPV4 (1ULL << 59)
259
260 /**
261 * Packet outer header is IPv6. This flag must be set when using any
262 * outer offload feature (L4 checksum) to tell the NIC that the outer
263 * header of the tunneled packet is an IPv6 packet.
264 */
265 #define PKT_TX_OUTER_IPV6 (1ULL << 60)
266
267 /**
268 * Bitmask of all supported packet Tx offload features flags,
269 * which can be set for packet.
270 */
271 #define PKT_TX_OFFLOAD_MASK ( \
272 PKT_TX_IP_CKSUM | \
273 PKT_TX_L4_MASK | \
274 PKT_TX_OUTER_IP_CKSUM | \
275 PKT_TX_TCP_SEG | \
276 PKT_TX_IEEE1588_TMST | \
277 PKT_TX_QINQ_PKT | \
278 PKT_TX_VLAN_PKT | \
279 PKT_TX_TUNNEL_MASK | \
280 PKT_TX_MACSEC)
281
282 #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
283
284 #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
285
286 /* Use final bit of flags to indicate a control mbuf */
287 #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
288
289 /** Alignment constraint of mbuf private area. */
290 #define RTE_MBUF_PRIV_ALIGN 8
291
292 /**
293 * Get the name of a RX offload flag
294 *
295 * @param mask
296 * The mask describing the flag.
297 * @return
298 * The name of this flag, or NULL if it's not a valid RX flag.
299 */
300 const char *rte_get_rx_ol_flag_name(uint64_t mask);
301
302 /**
303 * Dump the list of RX offload flags in a buffer
304 *
305 * @param mask
306 * The mask describing the RX flags.
307 * @param buf
308 * The output buffer.
309 * @param buflen
310 * The length of the buffer.
311 * @return
312 * 0 on success, (-1) on error.
313 */
314 int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
315
316 /**
317 * Get the name of a TX offload flag
318 *
319 * @param mask
320 * The mask describing the flag. Usually only one bit must be set.
321 * Several bits can be given if they belong to the same mask.
322 * Ex: PKT_TX_L4_MASK.
323 * @return
324 * The name of this flag, or NULL if it's not a valid TX flag.
325 */
326 const char *rte_get_tx_ol_flag_name(uint64_t mask);
327
328 /**
329 * Dump the list of TX offload flags in a buffer
330 *
331 * @param mask
332 * The mask describing the TX flags.
333 * @param buf
334 * The output buffer.
335 * @param buflen
336 * The length of the buffer.
337 * @return
338 * 0 on success, (-1) on error.
339 */
340 int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
341
342 /**
343 * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
344 * splitting it into multiple segments.
345 * So, for mbufs that planned to be involved into RX/TX, the recommended
346 * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
347 */
348 #define RTE_MBUF_DEFAULT_DATAROOM 2048
349 #define RTE_MBUF_DEFAULT_BUF_SIZE \
350 (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
351
352 /* define a set of marker types that can be used to refer to set points in the
353 * mbuf.
354 */
355 __extension__
356 typedef void *MARKER[0]; /**< generic marker for a point in a structure */
357 __extension__
358 typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
359 __extension__
360 typedef uint64_t MARKER64[0];
361 /**< marker that allows us to overwrite 8 bytes with a single assignment */
362
363 typedef struct {
364 volatile int16_t cnt; /**< An internal counter value. */
365 } rte_atomic16_t;
366
367 /**
368 * The generic rte_mbuf, containing a packet mbuf.
369 */
370 struct rte_mbuf {
371 MARKER cacheline0;
372
373 void *buf_addr; /**< Virtual address of segment buffer. */
374 /**
375 * Physical address of segment buffer.
376 * Force alignment to 8-bytes, so as to ensure we have the exact
377 * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
378 * working on vector drivers easier.
379 */
380 phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t));
381
382 /* next 8 bytes are initialised on RX descriptor rearm */
383 MARKER64 rearm_data;
384 uint16_t data_off;
385
386 /**
387 * Reference counter. Its size should at least equal to the size
388 * of port field (16 bits), to support zero-copy broadcast.
389 * It should only be accessed using the following functions:
390 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
391 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
392 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
393 * config option.
394 */
395 RTE_STD_C11
396 union {
397 rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
398 uint16_t refcnt;
399 /**< Non-atomically accessed refcnt */
400 };
401 uint16_t nb_segs; /**< Number of segments. */
402
403 /** Input port (16 bits to support more than 256 virtual ports). */
404 uint16_t port;
405
406 uint64_t ol_flags; /**< Offload features. */
407
408 /* remaining bytes are set on RX when pulling packet from descriptor */
409 MARKER rx_descriptor_fields1;
410
411 /*
412 * The packet type, which is the combination of outer/inner L2, L3, L4
413 * and tunnel types. The packet_type is about data really present in the
414 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
415 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
416 * vlan is stripped from the data.
417 */
418 RTE_STD_C11
419 union {
420 uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
421 struct {
422 uint32_t l2_type:4; /**< (Outer) L2 type. */
423 uint32_t l3_type:4; /**< (Outer) L3 type. */
424 uint32_t l4_type:4; /**< (Outer) L4 type. */
425 uint32_t tun_type:4; /**< Tunnel type. */
426 uint32_t inner_l2_type:4; /**< Inner L2 type. */
427 uint32_t inner_l3_type:4; /**< Inner L3 type. */
428 uint32_t inner_l4_type:4; /**< Inner L4 type. */
429 };
430 };
431
432 uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
433 uint16_t data_len; /**< Amount of data in segment buffer. */
434 /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
435 uint16_t vlan_tci;
436
437 union {
438 uint32_t rss; /**< RSS hash result if RSS enabled */
439 struct {
440 RTE_STD_C11
441 union {
442 struct {
443 uint16_t hash;
444 uint16_t id;
445 };
446 uint32_t lo;
447 /**< Second 4 flexible bytes */
448 };
449 uint32_t hi;
450 /**< First 4 flexible bytes or FD ID, dependent on
451 * PKT_RX_FDIR_* flag in ol_flags.
452 */
453 } fdir; /**< Filter identifier if FDIR enabled */
454 struct {
455 uint32_t lo;
456 uint32_t hi;
457 } sched; /**< Hierarchical scheduler */
458 uint32_t usr;
459 /**< User defined tags. See rte_distributor_process() */
460 } hash; /**< hash information */
461
462 /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
463 uint16_t vlan_tci_outer;
464
465 uint16_t buf_len; /**< Length of segment buffer. */
466
467 /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
468 * are not normalized but are always the same for a given port.
469 */
470 uint64_t timestamp;
471
472 /* second cache line - fields only used in slow path or on TX */
473 MARKER cacheline1 __rte_cache_min_aligned;
474
475 RTE_STD_C11
476 union {
477 void *userdata; /**< Can be used for external metadata */
478 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
479 };
480
481 struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
482 struct rte_mbuf *next; /**< Next segment of scattered packet. */
483
484 /* fields to support TX offloads */
485 RTE_STD_C11
486 union {
487 uint64_t tx_offload; /**< combined for easy fetch */
488 __extension__
489 struct {
490 uint64_t l2_len:7;
491 /**< L2 (MAC) Header Length for non-tunneling pkt.
492 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
493 */
494 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
495 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
496 uint64_t tso_segsz:16; /**< TCP TSO segment size */
497
498 /* fields for TX offloading of tunnels */
499 uint64_t outer_l3_len:9;
500 /**< Outer L3 (IP) Hdr Length. */
501 uint64_t outer_l2_len:7;
502 /**< Outer L2 (MAC) Hdr Length. */
503
504 /* uint64_t unused:8; */
505 };
506 };
507
508 /** Size of the application private data. In case of an indirect
509 * mbuf, it stores the direct mbuf private data size.
510 */
511 uint16_t priv_size;
512
513 /** Timesync flags for use with IEEE1588. */
514 uint16_t timesync;
515
516 /** Sequence number. See also rte_reorder_insert(). */
517 uint32_t seqn;
518
519 } __rte_cache_aligned;
520
521
522 /**
523 * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
524 */
525 #define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
526
527 /**
528 * Returns TRUE if given mbuf is direct, or FALSE otherwise.
529 */
530 #define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb))
531
532 /**
533 * Private data in case of pktmbuf pool.
534 *
535 * A structure that contains some pktmbuf_pool-specific data that are
536 * appended after the mempool structure (in private data).
537 */
538 struct rte_pktmbuf_pool_private {
539 uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
540 uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */
541 };
542
543 /**
544 * A macro that points to an offset into the data in the mbuf.
545 *
546 * The returned pointer is cast to type t. Before using this
547 * function, the user must ensure that the first segment is large
548 * enough to accommodate its data.
549 *
550 * @param m
551 * The packet mbuf.
552 * @param o
553 * The offset into the mbuf data.
554 * @param t
555 * The type to cast the result into.
556 */
557 #define rte_pktmbuf_mtod_offset(m, t, o) \
558 ((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
559
560 /**
561 * A macro that points to the start of the data in the mbuf.
562 *
563 * The returned pointer is cast to type t. Before using this
564 * function, the user must ensure that the first segment is large
565 * enough to accommodate its data.
566 *
567 * @param m
568 * The packet mbuf.
569 * @param t
570 * The type to cast the result into.
571 */
572 #define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
573
574 #ifdef __cplusplus
575 }
576 #endif
577
578 #endif /* _MBUF_H_ */