]> git.proxmox.com Git - ceph.git/blob - ceph/src/dpdk/lib/librte_mbuf/rte_mbuf.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / dpdk / lib / librte_mbuf / rte_mbuf.h
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * Copyright 2014 6WIND S.A.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #ifndef _RTE_MBUF_H_
36 #define _RTE_MBUF_H_
37
38 /**
39 * @file
40 * RTE Mbuf
41 *
42 * The mbuf library provides the ability to create and destroy buffers
43 * that may be used by the RTE application to store message
44 * buffers. The message buffers are stored in a mempool, using the
45 * RTE mempool library.
46 *
47 * This library provides an API to allocate/free packet mbufs, which are
48 * used to carry network packets.
49 *
50 * To understand the concepts of packet buffers or mbufs, you
51 * should read "TCP/IP Illustrated, Volume 2: The Implementation,
52 * Addison-Wesley, 1995, ISBN 0-201-63354-X from Richard Stevens"
53 * http://www.kohala.com/start/tcpipiv2.html
54 */
55
56 #include <stdint.h>
57 #include <rte_common.h>
58 #include <rte_mempool.h>
59 #include <rte_memory.h>
60 #include <rte_atomic.h>
61 #include <rte_prefetch.h>
62 #include <rte_branch_prediction.h>
63 #include <rte_mbuf_ptype.h>
64
65 #ifdef __cplusplus
66 extern "C" {
67 #endif
68
69 /*
70 * Packet Offload Features Flags. It also carry packet type information.
71 * Critical resources. Both rx/tx shared these bits. Be cautious on any change
72 *
73 * - RX flags start at bit position zero, and get added to the left of previous
74 * flags.
75 * - The most-significant 3 bits are reserved for generic mbuf flags
76 * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
77 * added to the right of the previously defined flags i.e. they should count
78 * downwards, not upwards.
79 *
80 * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
81 * rte_get_tx_ol_flag_name().
82 */
83
84 /**
85 * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
86 * the packet is recognized as a VLAN, but the behavior between PMDs
87 * was not the same. This flag is kept for some time to avoid breaking
88 * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
89 */
90 #define PKT_RX_VLAN_PKT (1ULL << 0)
91
92 #define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
93 #define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
94
95 /**
96 * Deprecated.
97 * Checking this flag alone is deprecated: check the 2 bits of
98 * PKT_RX_L4_CKSUM_MASK.
99 * This flag was set when the L4 checksum of a packet was detected as
100 * wrong by the hardware.
101 */
102 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
103
104 /**
105 * Deprecated.
106 * Checking this flag alone is deprecated: check the 2 bits of
107 * PKT_RX_IP_CKSUM_MASK.
108 * This flag was set when the IP checksum of a packet was detected as
109 * wrong by the hardware.
110 */
111 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
112
113 #define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */
114
115 /**
116 * A vlan has been stripped by the hardware and its tci is saved in
117 * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
118 * in the RX configuration of the PMD.
119 */
120 #define PKT_RX_VLAN_STRIPPED (1ULL << 6)
121
122 /**
123 * Mask of bits used to determine the status of RX IP checksum.
124 * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
125 * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
126 * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
127 * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
128 * data, but the integrity of the IP header is verified.
129 */
130 #define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
131
132 #define PKT_RX_IP_CKSUM_UNKNOWN 0
133 #define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
134 #define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
135 #define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
136
137 /**
138 * Mask of bits used to determine the status of RX L4 checksum.
139 * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
140 * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
141 * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
142 * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
143 * data, but the integrity of the L4 data is verified.
144 */
145 #define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
146
147 #define PKT_RX_L4_CKSUM_UNKNOWN 0
148 #define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
149 #define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
150 #define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
151
152 #define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
153 #define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
154 #define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
155 #define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
156
157 /**
158 * The 2 vlans have been stripped by the hardware and their tci are
159 * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
160 * This can only happen if vlan stripping is enabled in the RX
161 * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
162 * must also be set.
163 */
164 #define PKT_RX_QINQ_STRIPPED (1ULL << 15)
165
166 /**
167 * Deprecated.
168 * RX packet with double VLAN stripped.
169 * This flag is replaced by PKT_RX_QINQ_STRIPPED.
170 */
171 #define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
172
173 /**
174 * When packets are coalesced by a hardware or virtual driver, this flag
175 * can be set in the RX mbuf, meaning that the m->tso_segsz field is
176 * valid and is set to the segment size of original packets.
177 */
178 #define PKT_RX_LRO (1ULL << 16)
179
180 /* add new RX flags here */
181
182 /* add new TX flags here */
183
184 /**
185 * Bits 45:48 used for the tunnel type.
186 * When doing Tx offload like TSO or checksum, the HW needs to configure the
187 * tunnel type into the HW descriptors.
188 */
189 #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
190 #define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
191 #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
192 #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
193 /* add new TX TUNNEL type here */
194 #define PKT_TX_TUNNEL_MASK (0xFULL << 45)
195
196 /**
197 * Second VLAN insertion (QinQ) flag.
198 */
199 #define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */
200
201 /**
202 * TCP segmentation offload. To enable this offload feature for a
203 * packet to be transmitted on hardware supporting TSO:
204 * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
205 * PKT_TX_TCP_CKSUM)
206 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
207 * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
208 * to 0 in the packet
209 * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
210 * - calculate the pseudo header checksum without taking ip_len in account,
211 * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
212 * rte_ipv6_phdr_cksum() that can be used as helpers.
213 */
214 #define PKT_TX_TCP_SEG (1ULL << 50)
215
216 #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
217
218 /**
219 * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
220 * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
221 * L4 checksum offload, the user needs to:
222 * - fill l2_len and l3_len in mbuf
223 * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
224 * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
225 * - calculate the pseudo header checksum and set it in the L4 header (only
226 * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
227 * For SCTP, set the crc field to 0.
228 */
229 #define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
230 #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
231 #define PKT_TX_SCTP_CKSUM (2ULL << 52) /**< SCTP cksum of TX pkt. computed by NIC. */
232 #define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */
233 #define PKT_TX_L4_MASK (3ULL << 52) /**< Mask for L4 cksum offload request. */
234
235 /**
236 * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
237 * also be set by the application, although a PMD will only check
238 * PKT_TX_IP_CKSUM.
239 * - set the IP checksum field in the packet to 0
240 * - fill the mbuf offload information: l2_len, l3_len
241 */
242 #define PKT_TX_IP_CKSUM (1ULL << 54)
243
244 /**
245 * Packet is IPv4. This flag must be set when using any offload feature
246 * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
247 * packet. If the packet is a tunneled packet, this flag is related to
248 * the inner headers.
249 */
250 #define PKT_TX_IPV4 (1ULL << 55)
251
252 /**
253 * Packet is IPv6. This flag must be set when using an offload feature
254 * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
255 * packet. If the packet is a tunneled packet, this flag is related to
256 * the inner headers.
257 */
258 #define PKT_TX_IPV6 (1ULL << 56)
259
260 #define PKT_TX_VLAN_PKT (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */
261
262 /**
263 * Offload the IP checksum of an external header in the hardware. The
264 * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
265 * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
266 * packet must be set to 0.
267 * - set the outer IP checksum field in the packet to 0
268 * - fill the mbuf offload information: outer_l2_len, outer_l3_len
269 */
270 #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
271
272 /**
273 * Packet outer header is IPv4. This flag must be set when using any
274 * outer offload feature (L3 or L4 checksum) to tell the NIC that the
275 * outer header of the tunneled packet is an IPv4 packet.
276 */
277 #define PKT_TX_OUTER_IPV4 (1ULL << 59)
278
279 /**
280 * Packet outer header is IPv6. This flag must be set when using any
281 * outer offload feature (L4 checksum) to tell the NIC that the outer
282 * header of the tunneled packet is an IPv6 packet.
283 */
284 #define PKT_TX_OUTER_IPV6 (1ULL << 60)
285
286 #define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
287
288 #define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
289
290 /* Use final bit of flags to indicate a control mbuf */
291 #define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
292
293 /** Alignment constraint of mbuf private area. */
294 #define RTE_MBUF_PRIV_ALIGN 8
295
296 /**
297 * Get the name of a RX offload flag
298 *
299 * @param mask
300 * The mask describing the flag.
301 * @return
302 * The name of this flag, or NULL if it's not a valid RX flag.
303 */
304 const char *rte_get_rx_ol_flag_name(uint64_t mask);
305
306 /**
307 * Dump the list of RX offload flags in a buffer
308 *
309 * @param mask
310 * The mask describing the RX flags.
311 * @param buf
312 * The output buffer.
313 * @param buflen
314 * The length of the buffer.
315 * @return
316 * 0 on success, (-1) on error.
317 */
318 int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
319
320 /**
321 * Get the name of a TX offload flag
322 *
323 * @param mask
324 * The mask describing the flag. Usually only one bit must be set.
325 * Several bits can be given if they belong to the same mask.
326 * Ex: PKT_TX_L4_MASK.
327 * @return
328 * The name of this flag, or NULL if it's not a valid TX flag.
329 */
330 const char *rte_get_tx_ol_flag_name(uint64_t mask);
331
332 /**
333 * Dump the list of TX offload flags in a buffer
334 *
335 * @param mask
336 * The mask describing the TX flags.
337 * @param buf
338 * The output buffer.
339 * @param buflen
340 * The length of the buffer.
341 * @return
342 * 0 on success, (-1) on error.
343 */
344 int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
345
346 /**
347 * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
348 * splitting it into multiple segments.
349 * So, for mbufs that planned to be involved into RX/TX, the recommended
350 * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
351 */
352 #define RTE_MBUF_DEFAULT_DATAROOM 2048
353 #define RTE_MBUF_DEFAULT_BUF_SIZE \
354 (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
355
356 /* define a set of marker types that can be used to refer to set points in the
357 * mbuf */
358 __extension__
359 typedef void *MARKER[0]; /**< generic marker for a point in a structure */
360 __extension__
361 typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
362 __extension__
363 typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
364 * with a single assignment */
365
366 /**
367 * The generic rte_mbuf, containing a packet mbuf.
368 */
369 struct rte_mbuf {
370 MARKER cacheline0;
371
372 void *buf_addr; /**< Virtual address of segment buffer. */
373 phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */
374
375 uint16_t buf_len; /**< Length of segment buffer. */
376
377 /* next 6 bytes are initialised on RX descriptor rearm */
378 MARKER8 rearm_data;
379 uint16_t data_off;
380
381 /**
382 * 16-bit Reference counter.
383 * It should only be accessed using the following functions:
384 * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
385 * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
386 * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
387 * config option.
388 */
389 RTE_STD_C11
390 union {
391 rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
392 uint16_t refcnt; /**< Non-atomically accessed refcnt */
393 };
394 uint8_t nb_segs; /**< Number of segments. */
395 uint8_t port; /**< Input port. */
396
397 uint64_t ol_flags; /**< Offload features. */
398
399 /* remaining bytes are set on RX when pulling packet from descriptor */
400 MARKER rx_descriptor_fields1;
401
402 /*
403 * The packet type, which is the combination of outer/inner L2, L3, L4
404 * and tunnel types. The packet_type is about data really present in the
405 * mbuf. Example: if vlan stripping is enabled, a received vlan packet
406 * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
407 * vlan is stripped from the data.
408 */
409 RTE_STD_C11
410 union {
411 uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
412 struct {
413 uint32_t l2_type:4; /**< (Outer) L2 type. */
414 uint32_t l3_type:4; /**< (Outer) L3 type. */
415 uint32_t l4_type:4; /**< (Outer) L4 type. */
416 uint32_t tun_type:4; /**< Tunnel type. */
417 uint32_t inner_l2_type:4; /**< Inner L2 type. */
418 uint32_t inner_l3_type:4; /**< Inner L3 type. */
419 uint32_t inner_l4_type:4; /**< Inner L4 type. */
420 };
421 };
422
423 uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
424 uint16_t data_len; /**< Amount of data in segment buffer. */
425 /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
426 uint16_t vlan_tci;
427
428 union {
429 uint32_t rss; /**< RSS hash result if RSS enabled */
430 struct {
431 RTE_STD_C11
432 union {
433 struct {
434 uint16_t hash;
435 uint16_t id;
436 };
437 uint32_t lo;
438 /**< Second 4 flexible bytes */
439 };
440 uint32_t hi;
441 /**< First 4 flexible bytes or FD ID, dependent on
442 PKT_RX_FDIR_* flag in ol_flags. */
443 } fdir; /**< Filter identifier if FDIR enabled */
444 struct {
445 uint32_t lo;
446 uint32_t hi;
447 } sched; /**< Hierarchical scheduler */
448 uint32_t usr; /**< User defined tags. See rte_distributor_process() */
449 } hash; /**< hash information */
450
451 uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */
452
453 /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
454 uint16_t vlan_tci_outer;
455
456 /* second cache line - fields only used in slow path or on TX */
457 MARKER cacheline1 __rte_cache_min_aligned;
458
459 RTE_STD_C11
460 union {
461 void *userdata; /**< Can be used for external metadata */
462 uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
463 };
464
465 struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
466 struct rte_mbuf *next; /**< Next segment of scattered packet. */
467
468 /* fields to support TX offloads */
469 RTE_STD_C11
470 union {
471 uint64_t tx_offload; /**< combined for easy fetch */
472 __extension__
473 struct {
474 uint64_t l2_len:7;
475 /**< L2 (MAC) Header Length for non-tunneling pkt.
476 * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
477 */
478 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
479 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
480 uint64_t tso_segsz:16; /**< TCP TSO segment size */
481
482 /* fields for TX offloading of tunnels */
483 uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
484 uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
485
486 /* uint64_t unused:8; */
487 };
488 };
489
490 /** Size of the application private data. In case of an indirect
491 * mbuf, it stores the direct mbuf private data size. */
492 uint16_t priv_size;
493
494 /** Timesync flags for use with IEEE1588. */
495 uint16_t timesync;
496 } __rte_cache_aligned;
497
498 /**
499 * Prefetch the first part of the mbuf
500 *
501 * The first 64 bytes of the mbuf corresponds to fields that are used early
502 * in the receive path. If the cache line of the architecture is higher than
503 * 64B, the second part will also be prefetched.
504 *
505 * @param m
506 * The pointer to the mbuf.
507 */
508 static inline void
509 rte_mbuf_prefetch_part1(struct rte_mbuf *m)
510 {
511 rte_prefetch0(&m->cacheline0);
512 }
513
514 /**
515 * Prefetch the second part of the mbuf
516 *
517 * The next 64 bytes of the mbuf corresponds to fields that are used in the
518 * transmit path. If the cache line of the architecture is higher than 64B,
519 * this function does nothing as it is expected that the full mbuf is
520 * already in cache.
521 *
522 * @param m
523 * The pointer to the mbuf.
524 */
525 static inline void
526 rte_mbuf_prefetch_part2(struct rte_mbuf *m)
527 {
528 #if RTE_CACHE_LINE_SIZE == 64
529 rte_prefetch0(&m->cacheline1);
530 #else
531 RTE_SET_USED(m);
532 #endif
533 }
534
535
536 static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp);
537
538 /**
539 * Return the DMA address of the beginning of the mbuf data
540 *
541 * @param mb
542 * The pointer to the mbuf.
543 * @return
544 * The physical address of the beginning of the mbuf data
545 */
546 static inline phys_addr_t
547 rte_mbuf_data_dma_addr(const struct rte_mbuf *mb)
548 {
549 return mb->buf_physaddr + mb->data_off;
550 }
551
552 /**
553 * Return the default DMA address of the beginning of the mbuf data
554 *
555 * This function is used by drivers in their receive function, as it
556 * returns the location where data should be written by the NIC, taking
557 * the default headroom in account.
558 *
559 * @param mb
560 * The pointer to the mbuf.
561 * @return
562 * The physical address of the beginning of the mbuf data
563 */
564 static inline phys_addr_t
565 rte_mbuf_data_dma_addr_default(const struct rte_mbuf *mb)
566 {
567 return mb->buf_physaddr + RTE_PKTMBUF_HEADROOM;
568 }
569
570 /**
571 * Return the mbuf owning the data buffer address of an indirect mbuf.
572 *
573 * @param mi
574 * The pointer to the indirect mbuf.
575 * @return
576 * The address of the direct mbuf corresponding to buffer_addr.
577 */
578 static inline struct rte_mbuf *
579 rte_mbuf_from_indirect(struct rte_mbuf *mi)
580 {
581 return (struct rte_mbuf *)RTE_PTR_SUB(mi->buf_addr, sizeof(*mi) + mi->priv_size);
582 }
583
584 /**
585 * Return the buffer address embedded in the given mbuf.
586 *
587 * @param md
588 * The pointer to the mbuf.
589 * @return
590 * The address of the data buffer owned by the mbuf.
591 */
592 static inline char *
593 rte_mbuf_to_baddr(struct rte_mbuf *md)
594 {
595 char *buffer_addr;
596 buffer_addr = (char *)md + sizeof(*md) + rte_pktmbuf_priv_size(md->pool);
597 return buffer_addr;
598 }
599
600 /**
601 * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
602 */
603 #define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
604
605 /**
606 * Returns TRUE if given mbuf is direct, or FALSE otherwise.
607 */
608 #define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb))
609
610 /**
611 * Private data in case of pktmbuf pool.
612 *
613 * A structure that contains some pktmbuf_pool-specific data that are
614 * appended after the mempool structure (in private data).
615 */
616 struct rte_pktmbuf_pool_private {
617 uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
618 uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */
619 };
620
621 #ifdef RTE_LIBRTE_MBUF_DEBUG
622
623 /** check mbuf type in debug mode */
624 #define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h)
625
626 #else /* RTE_LIBRTE_MBUF_DEBUG */
627
628 /** check mbuf type in debug mode */
629 #define __rte_mbuf_sanity_check(m, is_h) do { } while (0)
630
631 #endif /* RTE_LIBRTE_MBUF_DEBUG */
632
633 #ifdef RTE_MBUF_REFCNT_ATOMIC
634
635 /**
636 * Reads the value of an mbuf's refcnt.
637 * @param m
638 * Mbuf to read
639 * @return
640 * Reference count number.
641 */
642 static inline uint16_t
643 rte_mbuf_refcnt_read(const struct rte_mbuf *m)
644 {
645 return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic));
646 }
647
648 /**
649 * Sets an mbuf's refcnt to a defined value.
650 * @param m
651 * Mbuf to update
652 * @param new_value
653 * Value set
654 */
655 static inline void
656 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
657 {
658 rte_atomic16_set(&m->refcnt_atomic, new_value);
659 }
660
661 /**
662 * Adds given value to an mbuf's refcnt and returns its new value.
663 * @param m
664 * Mbuf to update
665 * @param value
666 * Value to add/subtract
667 * @return
668 * Updated value
669 */
670 static inline uint16_t
671 rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
672 {
673 /*
674 * The atomic_add is an expensive operation, so we don't want to
675 * call it in the case where we know we are the uniq holder of
676 * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic
677 * operation has to be used because concurrent accesses on the
678 * reference counter can occur.
679 */
680 if (likely(rte_mbuf_refcnt_read(m) == 1)) {
681 rte_mbuf_refcnt_set(m, 1 + value);
682 return 1 + value;
683 }
684
685 return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value));
686 }
687
688 #else /* ! RTE_MBUF_REFCNT_ATOMIC */
689
690 /**
691 * Adds given value to an mbuf's refcnt and returns its new value.
692 */
693 static inline uint16_t
694 rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value)
695 {
696 m->refcnt = (uint16_t)(m->refcnt + value);
697 return m->refcnt;
698 }
699
700 /**
701 * Reads the value of an mbuf's refcnt.
702 */
703 static inline uint16_t
704 rte_mbuf_refcnt_read(const struct rte_mbuf *m)
705 {
706 return m->refcnt;
707 }
708
709 /**
710 * Sets an mbuf's refcnt to the defined value.
711 */
712 static inline void
713 rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
714 {
715 m->refcnt = new_value;
716 }
717
718 #endif /* RTE_MBUF_REFCNT_ATOMIC */
719
720 /** Mbuf prefetch */
721 #define RTE_MBUF_PREFETCH_TO_FREE(m) do { \
722 if ((m) != NULL) \
723 rte_prefetch0(m); \
724 } while (0)
725
726
727 /**
728 * Sanity checks on an mbuf.
729 *
730 * Check the consistency of the given mbuf. The function will cause a
731 * panic if corruption is detected.
732 *
733 * @param m
734 * The mbuf to be checked.
735 * @param is_header
736 * True if the mbuf is a packet header, false if it is a sub-segment
737 * of a packet (in this case, some fields like nb_segs are not checked)
738 */
739 void
740 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header);
741
742 /**
743 * Allocate an unitialized mbuf from mempool *mp*.
744 *
745 * This function can be used by PMDs (especially in RX functions) to
746 * allocate an unitialized mbuf. The driver is responsible of
747 * initializing all the required fields. See rte_pktmbuf_reset().
748 * For standard needs, prefer rte_pktmbuf_alloc().
749 *
750 * @param mp
751 * The mempool from which mbuf is allocated.
752 * @return
753 * - The pointer to the new mbuf on success.
754 * - NULL if allocation failed.
755 */
756 static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
757 {
758 struct rte_mbuf *m;
759 void *mb = NULL;
760
761 if (rte_mempool_get(mp, &mb) < 0)
762 return NULL;
763 m = (struct rte_mbuf *)mb;
764 RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
765 rte_mbuf_refcnt_set(m, 1);
766 __rte_mbuf_sanity_check(m, 0);
767
768 return m;
769 }
770
771 /**
772 * @internal Put mbuf back into its original mempool.
773 * The use of that function is reserved for RTE internal needs.
774 * Please use rte_pktmbuf_free().
775 *
776 * @param m
777 * The mbuf to be freed.
778 */
779 static inline void __attribute__((always_inline))
780 __rte_mbuf_raw_free(struct rte_mbuf *m)
781 {
782 RTE_ASSERT(rte_mbuf_refcnt_read(m) == 0);
783 rte_mempool_put(m->pool, m);
784 }
785
786 /* Operations on ctrl mbuf */
787
788 /**
789 * The control mbuf constructor.
790 *
791 * This function initializes some fields in an mbuf structure that are
792 * not modified by the user once created (mbuf type, origin pool, buffer
793 * start address, and so on). This function is given as a callback function
794 * to rte_mempool_create() at pool creation time.
795 *
796 * @param mp
797 * The mempool from which the mbuf is allocated.
798 * @param opaque_arg
799 * A pointer that can be used by the user to retrieve useful information
800 * for mbuf initialization. This pointer comes from the ``init_arg``
801 * parameter of rte_mempool_create().
802 * @param m
803 * The mbuf to initialize.
804 * @param i
805 * The index of the mbuf in the pool table.
806 */
807 void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg,
808 void *m, unsigned i);
809
810 /**
811 * Allocate a new mbuf (type is ctrl) from mempool *mp*.
812 *
813 * This new mbuf is initialized with data pointing to the beginning of
814 * buffer, and with a length of zero.
815 *
816 * @param mp
817 * The mempool from which the mbuf is allocated.
818 * @return
819 * - The pointer to the new mbuf on success.
820 * - NULL if allocation failed.
821 */
822 #define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp)
823
824 /**
825 * Free a control mbuf back into its original mempool.
826 *
827 * @param m
828 * The control mbuf to be freed.
829 */
830 #define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m)
831
832 /**
833 * A macro that returns the pointer to the carried data.
834 *
835 * The value that can be read or assigned.
836 *
837 * @param m
838 * The control mbuf.
839 */
840 #define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off)
841
842 /**
843 * A macro that returns the length of the carried data.
844 *
845 * The value that can be read or assigned.
846 *
847 * @param m
848 * The control mbuf.
849 */
850 #define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m)
851
852 /**
853 * Tests if an mbuf is a control mbuf
854 *
855 * @param m
856 * The mbuf to be tested
857 * @return
858 * - True (1) if the mbuf is a control mbuf
859 * - False(0) otherwise
860 */
861 static inline int
862 rte_is_ctrlmbuf(struct rte_mbuf *m)
863 {
864 return !!(m->ol_flags & CTRL_MBUF_FLAG);
865 }
866
867 /* Operations on pkt mbuf */
868
869 /**
870 * The packet mbuf constructor.
871 *
872 * This function initializes some fields in the mbuf structure that are
873 * not modified by the user once created (origin pool, buffer start
874 * address, and so on). This function is given as a callback function to
875 * rte_mempool_create() at pool creation time.
876 *
877 * @param mp
878 * The mempool from which mbufs originate.
879 * @param opaque_arg
880 * A pointer that can be used by the user to retrieve useful information
881 * for mbuf initialization. This pointer comes from the ``init_arg``
882 * parameter of rte_mempool_create().
883 * @param m
884 * The mbuf to initialize.
885 * @param i
886 * The index of the mbuf in the pool table.
887 */
888 void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg,
889 void *m, unsigned i);
890
891
892 /**
893 * A packet mbuf pool constructor.
894 *
895 * This function initializes the mempool private data in the case of a
896 * pktmbuf pool. This private data is needed by the driver. The
897 * function is given as a callback function to rte_mempool_create() at
898 * pool creation. It can be extended by the user, for example, to
899 * provide another packet size.
900 *
901 * @param mp
902 * The mempool from which mbufs originate.
903 * @param opaque_arg
904 * A pointer that can be used by the user to retrieve useful information
905 * for mbuf initialization. This pointer comes from the ``init_arg``
906 * parameter of rte_mempool_create().
907 */
908 void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg);
909
910 /**
911 * Create a mbuf pool.
912 *
913 * This function creates and initializes a packet mbuf pool. It is
914 * a wrapper to rte_mempool_create() with the proper packet constructor
915 * and mempool constructor.
916 *
917 * @param name
918 * The name of the mbuf pool.
919 * @param n
920 * The number of elements in the mbuf pool. The optimum size (in terms
921 * of memory usage) for a mempool is when n is a power of two minus one:
922 * n = (2^q - 1).
923 * @param cache_size
924 * Size of the per-core object cache. See rte_mempool_create() for
925 * details.
926 * @param priv_size
927 * Size of application private are between the rte_mbuf structure
928 * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN.
929 * @param data_room_size
930 * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM.
931 * @param socket_id
932 * The socket identifier where the memory should be allocated. The
933 * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the
934 * reserved zone.
935 * @return
936 * The pointer to the new allocated mempool, on success. NULL on error
937 * with rte_errno set appropriately. Possible rte_errno values include:
938 * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure
939 * - E_RTE_SECONDARY - function was called from a secondary process instance
940 * - EINVAL - cache size provided is too large, or priv_size is not aligned.
941 * - ENOSPC - the maximum number of memzones has already been allocated
942 * - EEXIST - a memzone with the same name already exists
943 * - ENOMEM - no appropriate memory area found in which to create memzone
944 */
945 struct rte_mempool *
946 rte_pktmbuf_pool_create(const char *name, unsigned n,
947 unsigned cache_size, uint16_t priv_size, uint16_t data_room_size,
948 int socket_id);
949
950 /**
951 * Get the data room size of mbufs stored in a pktmbuf_pool
952 *
953 * The data room size is the amount of data that can be stored in a
954 * mbuf including the headroom (RTE_PKTMBUF_HEADROOM).
955 *
956 * @param mp
957 * The packet mbuf pool.
958 * @return
959 * The data room size of mbufs stored in this mempool.
960 */
961 static inline uint16_t
962 rte_pktmbuf_data_room_size(struct rte_mempool *mp)
963 {
964 struct rte_pktmbuf_pool_private *mbp_priv;
965
966 mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
967 return mbp_priv->mbuf_data_room_size;
968 }
969
970 /**
971 * Get the application private size of mbufs stored in a pktmbuf_pool
972 *
973 * The private size of mbuf is a zone located between the rte_mbuf
974 * structure and the data buffer where an application can store data
975 * associated to a packet.
976 *
977 * @param mp
978 * The packet mbuf pool.
979 * @return
980 * The private size of mbufs stored in this mempool.
981 */
982 static inline uint16_t
983 rte_pktmbuf_priv_size(struct rte_mempool *mp)
984 {
985 struct rte_pktmbuf_pool_private *mbp_priv;
986
987 mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp);
988 return mbp_priv->mbuf_priv_size;
989 }
990
991 /**
992 * Reset the data_off field of a packet mbuf to its default value.
993 *
994 * The given mbuf must have only one segment, which should be empty.
995 *
996 * @param m
997 * The packet mbuf's data_off field has to be reset.
998 */
999 static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m)
1000 {
1001 m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len);
1002 }
1003
1004 /**
1005 * Reset the fields of a packet mbuf to their default values.
1006 *
1007 * The given mbuf must have only one segment.
1008 *
1009 * @param m
1010 * The packet mbuf to be resetted.
1011 */
1012 static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
1013 {
1014 m->next = NULL;
1015 m->pkt_len = 0;
1016 m->tx_offload = 0;
1017 m->vlan_tci = 0;
1018 m->vlan_tci_outer = 0;
1019 m->nb_segs = 1;
1020 m->port = 0xff;
1021
1022 m->ol_flags = 0;
1023 m->packet_type = 0;
1024 rte_pktmbuf_reset_headroom(m);
1025
1026 m->data_len = 0;
1027 __rte_mbuf_sanity_check(m, 1);
1028 }
1029
1030 /**
1031 * Allocate a new mbuf from a mempool.
1032 *
1033 * This new mbuf contains one segment, which has a length of 0. The pointer
1034 * to data is initialized to have some bytes of headroom in the buffer
1035 * (if buffer size allows).
1036 *
1037 * @param mp
1038 * The mempool from which the mbuf is allocated.
1039 * @return
1040 * - The pointer to the new mbuf on success.
1041 * - NULL if allocation failed.
1042 */
1043 static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp)
1044 {
1045 struct rte_mbuf *m;
1046 if ((m = rte_mbuf_raw_alloc(mp)) != NULL)
1047 rte_pktmbuf_reset(m);
1048 return m;
1049 }
1050
1051 /**
1052 * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default
1053 * values.
1054 *
1055 * @param pool
1056 * The mempool from which mbufs are allocated.
1057 * @param mbufs
1058 * Array of pointers to mbufs
1059 * @param count
1060 * Array size
1061 * @return
1062 * - 0: Success
1063 */
1064 static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
1065 struct rte_mbuf **mbufs, unsigned count)
1066 {
1067 unsigned idx = 0;
1068 int rc;
1069
1070 rc = rte_mempool_get_bulk(pool, (void **)mbufs, count);
1071 if (unlikely(rc))
1072 return rc;
1073
1074 /* To understand duff's device on loop unwinding optimization, see
1075 * https://en.wikipedia.org/wiki/Duff's_device.
1076 * Here while() loop is used rather than do() while{} to avoid extra
1077 * check if count is zero.
1078 */
1079 switch (count % 4) {
1080 case 0:
1081 while (idx != count) {
1082 RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
1083 rte_mbuf_refcnt_set(mbufs[idx], 1);
1084 rte_pktmbuf_reset(mbufs[idx]);
1085 idx++;
1086 case 3:
1087 RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
1088 rte_mbuf_refcnt_set(mbufs[idx], 1);
1089 rte_pktmbuf_reset(mbufs[idx]);
1090 idx++;
1091 case 2:
1092 RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
1093 rte_mbuf_refcnt_set(mbufs[idx], 1);
1094 rte_pktmbuf_reset(mbufs[idx]);
1095 idx++;
1096 case 1:
1097 RTE_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0);
1098 rte_mbuf_refcnt_set(mbufs[idx], 1);
1099 rte_pktmbuf_reset(mbufs[idx]);
1100 idx++;
1101 }
1102 }
1103 return 0;
1104 }
1105
1106 /**
1107 * Attach packet mbuf to another packet mbuf.
1108 *
1109 * After attachment we refer the mbuf we attached as 'indirect',
1110 * while mbuf we attached to as 'direct'.
1111 * The direct mbuf's reference counter is incremented.
1112 *
1113 * Right now, not supported:
1114 * - attachment for already indirect mbuf (e.g. - mi has to be direct).
1115 * - mbuf we trying to attach (mi) is used by someone else
1116 * e.g. it's reference counter is greater then 1.
1117 *
1118 * @param mi
1119 * The indirect packet mbuf.
1120 * @param m
1121 * The packet mbuf we're attaching to.
1122 */
1123 static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
1124 {
1125 struct rte_mbuf *md;
1126
1127 RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
1128 rte_mbuf_refcnt_read(mi) == 1);
1129
1130 /* if m is not direct, get the mbuf that embeds the data */
1131 if (RTE_MBUF_DIRECT(m))
1132 md = m;
1133 else
1134 md = rte_mbuf_from_indirect(m);
1135
1136 rte_mbuf_refcnt_update(md, 1);
1137 mi->priv_size = m->priv_size;
1138 mi->buf_physaddr = m->buf_physaddr;
1139 mi->buf_addr = m->buf_addr;
1140 mi->buf_len = m->buf_len;
1141
1142 mi->next = m->next;
1143 mi->data_off = m->data_off;
1144 mi->data_len = m->data_len;
1145 mi->port = m->port;
1146 mi->vlan_tci = m->vlan_tci;
1147 mi->vlan_tci_outer = m->vlan_tci_outer;
1148 mi->tx_offload = m->tx_offload;
1149 mi->hash = m->hash;
1150
1151 mi->next = NULL;
1152 mi->pkt_len = mi->data_len;
1153 mi->nb_segs = 1;
1154 mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
1155 mi->packet_type = m->packet_type;
1156
1157 __rte_mbuf_sanity_check(mi, 1);
1158 __rte_mbuf_sanity_check(m, 0);
1159 }
1160
1161 /**
1162 * Detach an indirect packet mbuf.
1163 *
1164 * - restore original mbuf address and length values.
1165 * - reset pktmbuf data and data_len to their default values.
1166 * - decrement the direct mbuf's reference counter. When the
1167 * reference counter becomes 0, the direct mbuf is freed.
1168 *
1169 * All other fields of the given packet mbuf will be left intact.
1170 *
1171 * @param m
1172 * The indirect attached packet mbuf.
1173 */
1174 static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
1175 {
1176 struct rte_mbuf *md = rte_mbuf_from_indirect(m);
1177 struct rte_mempool *mp = m->pool;
1178 uint32_t mbuf_size, buf_len, priv_size;
1179
1180 priv_size = rte_pktmbuf_priv_size(mp);
1181 mbuf_size = sizeof(struct rte_mbuf) + priv_size;
1182 buf_len = rte_pktmbuf_data_room_size(mp);
1183
1184 m->priv_size = priv_size;
1185 m->buf_addr = (char *)m + mbuf_size;
1186 m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size;
1187 m->buf_len = (uint16_t)buf_len;
1188 rte_pktmbuf_reset_headroom(m);
1189 m->data_len = 0;
1190 m->ol_flags = 0;
1191
1192 if (rte_mbuf_refcnt_update(md, -1) == 0)
1193 __rte_mbuf_raw_free(md);
1194 }
1195
1196 static inline struct rte_mbuf* __attribute__((always_inline))
1197 __rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
1198 {
1199 __rte_mbuf_sanity_check(m, 0);
1200
1201 if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) {
1202 /* if this is an indirect mbuf, it is detached. */
1203 if (RTE_MBUF_INDIRECT(m))
1204 rte_pktmbuf_detach(m);
1205 return m;
1206 }
1207 return NULL;
1208 }
1209
1210 /**
1211 * Free a segment of a packet mbuf into its original mempool.
1212 *
1213 * Free an mbuf, without parsing other segments in case of chained
1214 * buffers.
1215 *
1216 * @param m
1217 * The packet mbuf segment to be freed.
1218 */
1219 static inline void __attribute__((always_inline))
1220 rte_pktmbuf_free_seg(struct rte_mbuf *m)
1221 {
1222 if (likely(NULL != (m = __rte_pktmbuf_prefree_seg(m)))) {
1223 m->next = NULL;
1224 __rte_mbuf_raw_free(m);
1225 }
1226 }
1227
1228 /**
1229 * Free a packet mbuf back into its original mempool.
1230 *
1231 * Free an mbuf, and all its segments in case of chained buffers. Each
1232 * segment is added back into its original mempool.
1233 *
1234 * @param m
1235 * The packet mbuf to be freed.
1236 */
1237 static inline void rte_pktmbuf_free(struct rte_mbuf *m)
1238 {
1239 struct rte_mbuf *m_next;
1240
1241 __rte_mbuf_sanity_check(m, 1);
1242
1243 while (m != NULL) {
1244 m_next = m->next;
1245 rte_pktmbuf_free_seg(m);
1246 m = m_next;
1247 }
1248 }
1249
1250 /**
1251 * Creates a "clone" of the given packet mbuf.
1252 *
1253 * Walks through all segments of the given packet mbuf, and for each of them:
1254 * - Creates a new packet mbuf from the given pool.
1255 * - Attaches newly created mbuf to the segment.
1256 * Then updates pkt_len and nb_segs of the "clone" packet mbuf to match values
1257 * from the original packet mbuf.
1258 *
1259 * @param md
1260 * The packet mbuf to be cloned.
1261 * @param mp
1262 * The mempool from which the "clone" mbufs are allocated.
1263 * @return
1264 * - The pointer to the new "clone" mbuf on success.
1265 * - NULL if allocation fails.
1266 */
1267 static inline struct rte_mbuf *rte_pktmbuf_clone(struct rte_mbuf *md,
1268 struct rte_mempool *mp)
1269 {
1270 struct rte_mbuf *mc, *mi, **prev;
1271 uint32_t pktlen;
1272 uint8_t nseg;
1273
1274 if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
1275 return NULL;
1276
1277 mi = mc;
1278 prev = &mi->next;
1279 pktlen = md->pkt_len;
1280 nseg = 0;
1281
1282 do {
1283 nseg++;
1284 rte_pktmbuf_attach(mi, md);
1285 *prev = mi;
1286 prev = &mi->next;
1287 } while ((md = md->next) != NULL &&
1288 (mi = rte_pktmbuf_alloc(mp)) != NULL);
1289
1290 *prev = NULL;
1291 mc->nb_segs = nseg;
1292 mc->pkt_len = pktlen;
1293
1294 /* Allocation of new indirect segment failed */
1295 if (unlikely (mi == NULL)) {
1296 rte_pktmbuf_free(mc);
1297 return NULL;
1298 }
1299
1300 __rte_mbuf_sanity_check(mc, 1);
1301 return mc;
1302 }
1303
1304 /**
1305 * Adds given value to the refcnt of all packet mbuf segments.
1306 *
1307 * Walks through all segments of given packet mbuf and for each of them
1308 * invokes rte_mbuf_refcnt_update().
1309 *
1310 * @param m
1311 * The packet mbuf whose refcnt to be updated.
1312 * @param v
1313 * The value to add to the mbuf's segments refcnt.
1314 */
1315 static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v)
1316 {
1317 __rte_mbuf_sanity_check(m, 1);
1318
1319 do {
1320 rte_mbuf_refcnt_update(m, v);
1321 } while ((m = m->next) != NULL);
1322 }
1323
1324 /**
1325 * Get the headroom in a packet mbuf.
1326 *
1327 * @param m
1328 * The packet mbuf.
1329 * @return
1330 * The length of the headroom.
1331 */
1332 static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m)
1333 {
1334 __rte_mbuf_sanity_check(m, 1);
1335 return m->data_off;
1336 }
1337
1338 /**
1339 * Get the tailroom of a packet mbuf.
1340 *
1341 * @param m
1342 * The packet mbuf.
1343 * @return
1344 * The length of the tailroom.
1345 */
1346 static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m)
1347 {
1348 __rte_mbuf_sanity_check(m, 1);
1349 return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) -
1350 m->data_len);
1351 }
1352
1353 /**
1354 * Get the last segment of the packet.
1355 *
1356 * @param m
1357 * The packet mbuf.
1358 * @return
1359 * The last segment of the given mbuf.
1360 */
1361 static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m)
1362 {
1363 struct rte_mbuf *m2 = (struct rte_mbuf *)m;
1364
1365 __rte_mbuf_sanity_check(m, 1);
1366 while (m2->next != NULL)
1367 m2 = m2->next;
1368 return m2;
1369 }
1370
1371 /**
1372 * A macro that points to an offset into the data in the mbuf.
1373 *
1374 * The returned pointer is cast to type t. Before using this
1375 * function, the user must ensure that the first segment is large
1376 * enough to accommodate its data.
1377 *
1378 * @param m
1379 * The packet mbuf.
1380 * @param o
1381 * The offset into the mbuf data.
1382 * @param t
1383 * The type to cast the result into.
1384 */
1385 #define rte_pktmbuf_mtod_offset(m, t, o) \
1386 ((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
1387
1388 /**
1389 * A macro that points to the start of the data in the mbuf.
1390 *
1391 * The returned pointer is cast to type t. Before using this
1392 * function, the user must ensure that the first segment is large
1393 * enough to accommodate its data.
1394 *
1395 * @param m
1396 * The packet mbuf.
1397 * @param t
1398 * The type to cast the result into.
1399 */
1400 #define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
1401
1402 /**
1403 * A macro that returns the physical address that points to an offset of the
1404 * start of the data in the mbuf
1405 *
1406 * @param m
1407 * The packet mbuf.
1408 * @param o
1409 * The offset into the data to calculate address from.
1410 */
1411 #define rte_pktmbuf_mtophys_offset(m, o) \
1412 (phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o))
1413
1414 /**
1415 * A macro that returns the physical address that points to the start of the
1416 * data in the mbuf
1417 *
1418 * @param m
1419 * The packet mbuf.
1420 */
1421 #define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0)
1422
1423 /**
1424 * A macro that returns the length of the packet.
1425 *
1426 * The value can be read or assigned.
1427 *
1428 * @param m
1429 * The packet mbuf.
1430 */
1431 #define rte_pktmbuf_pkt_len(m) ((m)->pkt_len)
1432
1433 /**
1434 * A macro that returns the length of the segment.
1435 *
1436 * The value can be read or assigned.
1437 *
1438 * @param m
1439 * The packet mbuf.
1440 */
1441 #define rte_pktmbuf_data_len(m) ((m)->data_len)
1442
1443 /**
1444 * Prepend len bytes to an mbuf data area.
1445 *
1446 * Returns a pointer to the new
1447 * data start address. If there is not enough headroom in the first
1448 * segment, the function will return NULL, without modifying the mbuf.
1449 *
1450 * @param m
1451 * The pkt mbuf.
1452 * @param len
1453 * The amount of data to prepend (in bytes).
1454 * @return
1455 * A pointer to the start of the newly prepended data, or
1456 * NULL if there is not enough headroom space in the first segment
1457 */
1458 static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m,
1459 uint16_t len)
1460 {
1461 __rte_mbuf_sanity_check(m, 1);
1462
1463 if (unlikely(len > rte_pktmbuf_headroom(m)))
1464 return NULL;
1465
1466 m->data_off -= len;
1467 m->data_len = (uint16_t)(m->data_len + len);
1468 m->pkt_len = (m->pkt_len + len);
1469
1470 return (char *)m->buf_addr + m->data_off;
1471 }
1472
1473 /**
1474 * Append len bytes to an mbuf.
1475 *
1476 * Append len bytes to an mbuf and return a pointer to the start address
1477 * of the added data. If there is not enough tailroom in the last
1478 * segment, the function will return NULL, without modifying the mbuf.
1479 *
1480 * @param m
1481 * The packet mbuf.
1482 * @param len
1483 * The amount of data to append (in bytes).
1484 * @return
1485 * A pointer to the start of the newly appended data, or
1486 * NULL if there is not enough tailroom space in the last segment
1487 */
1488 static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len)
1489 {
1490 void *tail;
1491 struct rte_mbuf *m_last;
1492
1493 __rte_mbuf_sanity_check(m, 1);
1494
1495 m_last = rte_pktmbuf_lastseg(m);
1496 if (unlikely(len > rte_pktmbuf_tailroom(m_last)))
1497 return NULL;
1498
1499 tail = (char *)m_last->buf_addr + m_last->data_off + m_last->data_len;
1500 m_last->data_len = (uint16_t)(m_last->data_len + len);
1501 m->pkt_len = (m->pkt_len + len);
1502 return (char*) tail;
1503 }
1504
1505 /**
1506 * Remove len bytes at the beginning of an mbuf.
1507 *
1508 * Returns a pointer to the start address of the new data area. If the
1509 * length is greater than the length of the first segment, then the
1510 * function will fail and return NULL, without modifying the mbuf.
1511 *
1512 * @param m
1513 * The packet mbuf.
1514 * @param len
1515 * The amount of data to remove (in bytes).
1516 * @return
1517 * A pointer to the new start of the data.
1518 */
1519 static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len)
1520 {
1521 __rte_mbuf_sanity_check(m, 1);
1522
1523 if (unlikely(len > m->data_len))
1524 return NULL;
1525
1526 m->data_len = (uint16_t)(m->data_len - len);
1527 m->data_off += len;
1528 m->pkt_len = (m->pkt_len - len);
1529 return (char *)m->buf_addr + m->data_off;
1530 }
1531
1532 /**
1533 * Remove len bytes of data at the end of the mbuf.
1534 *
1535 * If the length is greater than the length of the last segment, the
1536 * function will fail and return -1 without modifying the mbuf.
1537 *
1538 * @param m
1539 * The packet mbuf.
1540 * @param len
1541 * The amount of data to remove (in bytes).
1542 * @return
1543 * - 0: On success.
1544 * - -1: On error.
1545 */
1546 static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len)
1547 {
1548 struct rte_mbuf *m_last;
1549
1550 __rte_mbuf_sanity_check(m, 1);
1551
1552 m_last = rte_pktmbuf_lastseg(m);
1553 if (unlikely(len > m_last->data_len))
1554 return -1;
1555
1556 m_last->data_len = (uint16_t)(m_last->data_len - len);
1557 m->pkt_len = (m->pkt_len - len);
1558 return 0;
1559 }
1560
1561 /**
1562 * Test if mbuf data is contiguous.
1563 *
1564 * @param m
1565 * The packet mbuf.
1566 * @return
1567 * - 1, if all data is contiguous (one segment).
1568 * - 0, if there is several segments.
1569 */
1570 static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m)
1571 {
1572 __rte_mbuf_sanity_check(m, 1);
1573 return !!(m->nb_segs == 1);
1574 }
1575
1576 /**
1577 * @internal used by rte_pktmbuf_read().
1578 */
1579 const void *__rte_pktmbuf_read(const struct rte_mbuf *m, uint32_t off,
1580 uint32_t len, void *buf);
1581
1582 /**
1583 * Read len data bytes in a mbuf at specified offset.
1584 *
1585 * If the data is contiguous, return the pointer in the mbuf data, else
1586 * copy the data in the buffer provided by the user and return its
1587 * pointer.
1588 *
1589 * @param m
1590 * The pointer to the mbuf.
1591 * @param off
1592 * The offset of the data in the mbuf.
1593 * @param len
1594 * The amount of bytes to read.
1595 * @param buf
1596 * The buffer where data is copied if it is not contigous in mbuf
1597 * data. Its length should be at least equal to the len parameter.
1598 * @return
1599 * The pointer to the data, either in the mbuf if it is contiguous,
1600 * or in the user buffer. If mbuf is too small, NULL is returned.
1601 */
1602 static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m,
1603 uint32_t off, uint32_t len, void *buf)
1604 {
1605 if (likely(off + len <= rte_pktmbuf_data_len(m)))
1606 return rte_pktmbuf_mtod_offset(m, char *, off);
1607 else
1608 return __rte_pktmbuf_read(m, off, len, buf);
1609 }
1610
1611 /**
1612 * Chain an mbuf to another, thereby creating a segmented packet.
1613 *
1614 * Note: The implementation will do a linear walk over the segments to find
1615 * the tail entry. For cases when there are many segments, it's better to
1616 * chain the entries manually.
1617 *
1618 * @param head
1619 * The head of the mbuf chain (the first packet)
1620 * @param tail
1621 * The mbuf to put last in the chain
1622 *
1623 * @return
1624 * - 0, on success.
1625 * - -EOVERFLOW, if the chain is full (256 entries)
1626 */
1627 static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail)
1628 {
1629 struct rte_mbuf *cur_tail;
1630
1631 /* Check for number-of-segments-overflow */
1632 if (head->nb_segs + tail->nb_segs >= 1 << (sizeof(head->nb_segs) * 8))
1633 return -EOVERFLOW;
1634
1635 /* Chain 'tail' onto the old tail */
1636 cur_tail = rte_pktmbuf_lastseg(head);
1637 cur_tail->next = tail;
1638
1639 /* accumulate number of segments and total length. */
1640 head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs);
1641 head->pkt_len += tail->pkt_len;
1642
1643 /* pkt_len is only set in the head */
1644 tail->pkt_len = tail->data_len;
1645
1646 return 0;
1647 }
1648
1649 /**
1650 * Dump an mbuf structure to a file.
1651 *
1652 * Dump all fields for the given packet mbuf and all its associated
1653 * segments (in the case of a chained buffer).
1654 *
1655 * @param f
1656 * A pointer to a file for output
1657 * @param m
1658 * The packet mbuf.
1659 * @param dump_len
1660 * If dump_len != 0, also dump the "dump_len" first data bytes of
1661 * the packet.
1662 */
1663 void rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len);
1664
1665 #ifdef __cplusplus
1666 }
1667 #endif
1668
1669 #endif /* _RTE_MBUF_H_ */