]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #ifndef _RTE_IP_FRAG_H_ | |
35 | #define _RTE_IP_FRAG_H_ | |
36 | ||
37 | /** | |
38 | * @file | |
39 | * RTE IP Fragmentation and Reassembly | |
40 | * | |
41 | * Implementation of IP packet fragmentation and reassembly. | |
42 | */ | |
43 | ||
44 | #ifdef __cplusplus | |
45 | extern "C" { | |
46 | #endif | |
47 | ||
48 | #include <stdint.h> | |
49 | #include <stdio.h> | |
50 | ||
51 | #include <rte_malloc.h> | |
52 | #include <rte_memory.h> | |
53 | #include <rte_ip.h> | |
54 | #include <rte_byteorder.h> | |
55 | ||
56 | struct rte_mbuf; | |
57 | ||
58 | enum { | |
59 | IP_LAST_FRAG_IDX, /**< index of last fragment */ | |
60 | IP_FIRST_FRAG_IDX, /**< index of first fragment */ | |
61 | IP_MIN_FRAG_NUM, /**< minimum number of fragments */ | |
62 | IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG, | |
63 | /**< maximum number of fragments per packet */ | |
64 | }; | |
65 | ||
66 | /** @internal fragmented mbuf */ | |
67 | struct ip_frag { | |
68 | uint16_t ofs; /**< offset into the packet */ | |
69 | uint16_t len; /**< length of fragment */ | |
70 | struct rte_mbuf *mb; /**< fragment mbuf */ | |
71 | }; | |
72 | ||
73 | /** @internal <src addr, dst_addr, id> to uniquely indetify fragmented datagram. */ | |
74 | struct ip_frag_key { | |
75 | uint64_t src_dst[4]; /**< src address, first 8 bytes used for IPv4 */ | |
76 | uint32_t id; /**< dst address */ | |
77 | uint32_t key_len; /**< src/dst key length */ | |
78 | }; | |
79 | ||
80 | /** | |
81 | * @internal Fragmented packet to reassemble. | |
82 | * First two entries in the frags[] array are for the last and first fragments. | |
83 | */ | |
84 | struct ip_frag_pkt { | |
85 | TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */ | |
86 | struct ip_frag_key key; /**< fragmentation key */ | |
87 | uint64_t start; /**< creation timestamp */ | |
88 | uint32_t total_size; /**< expected reassembled size */ | |
89 | uint32_t frag_size; /**< size of fragments received */ | |
90 | uint32_t last_idx; /**< index of next entry to fill */ | |
91 | struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */ | |
92 | } __rte_cache_aligned; | |
93 | ||
94 | #define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */ | |
95 | ||
96 | /** mbuf death row (packets to be freed) */ | |
97 | struct rte_ip_frag_death_row { | |
98 | uint32_t cnt; /**< number of mbufs currently on death row */ | |
99 | struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)]; | |
100 | /**< mbufs to be freed */ | |
101 | }; | |
102 | ||
103 | TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */ | |
104 | ||
105 | /** fragmentation table statistics */ | |
106 | struct ip_frag_tbl_stat { | |
107 | uint64_t find_num; /**< total # of find/insert attempts. */ | |
108 | uint64_t add_num; /**< # of add ops. */ | |
109 | uint64_t del_num; /**< # of del ops. */ | |
110 | uint64_t reuse_num; /**< # of reuse (del/add) ops. */ | |
111 | uint64_t fail_total; /**< total # of add failures. */ | |
112 | uint64_t fail_nospace; /**< # of 'no space' add failures. */ | |
113 | } __rte_cache_aligned; | |
114 | ||
115 | /** fragmentation table */ | |
116 | struct rte_ip_frag_tbl { | |
117 | uint64_t max_cycles; /**< ttl for table entries. */ | |
118 | uint32_t entry_mask; /**< hash value mask. */ | |
119 | uint32_t max_entries; /**< max entries allowed. */ | |
120 | uint32_t use_entries; /**< entries in use. */ | |
121 | uint32_t bucket_entries; /**< hash assocaitivity. */ | |
122 | uint32_t nb_entries; /**< total size of the table. */ | |
123 | uint32_t nb_buckets; /**< num of associativity lines. */ | |
124 | struct ip_frag_pkt *last; /**< last used entry. */ | |
125 | struct ip_pkt_list lru; /**< LRU list for table entries. */ | |
126 | struct ip_frag_tbl_stat stat; /**< statistics counters. */ | |
127 | __extension__ struct ip_frag_pkt pkt[0]; /**< hash table. */ | |
128 | }; | |
129 | ||
130 | /** IPv6 fragment extension header */ | |
131 | #define RTE_IPV6_EHDR_MF_SHIFT 0 | |
132 | #define RTE_IPV6_EHDR_MF_MASK 1 | |
133 | #define RTE_IPV6_EHDR_FO_SHIFT 3 | |
134 | #define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1)) | |
135 | ||
136 | #define RTE_IPV6_FRAG_USED_MASK \ | |
137 | (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK) | |
138 | ||
139 | #define RTE_IPV6_GET_MF(x) ((x) & RTE_IPV6_EHDR_MF_MASK) | |
140 | #define RTE_IPV6_GET_FO(x) ((x) >> RTE_IPV6_EHDR_FO_SHIFT) | |
141 | ||
142 | #define RTE_IPV6_SET_FRAG_DATA(fo, mf) \ | |
143 | (((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK)) | |
144 | ||
145 | struct ipv6_extension_fragment { | |
146 | uint8_t next_header; /**< Next header type */ | |
147 | uint8_t reserved; /**< Reserved */ | |
148 | uint16_t frag_data; /**< All fragmentation data */ | |
149 | uint32_t id; /**< Packet ID */ | |
150 | } __attribute__((__packed__)); | |
151 | ||
152 | ||
153 | ||
154 | /** | |
155 | * Create a new IP fragmentation table. | |
156 | * | |
157 | * @param bucket_num | |
158 | * Number of buckets in the hash table. | |
159 | * @param bucket_entries | |
160 | * Number of entries per bucket (e.g. hash associativity). | |
161 | * Should be power of two. | |
162 | * @param max_entries | |
163 | * Maximum number of entries that could be stored in the table. | |
164 | * The value should be less or equal then bucket_num * bucket_entries. | |
165 | * @param max_cycles | |
166 | * Maximum TTL in cycles for each fragmented packet. | |
167 | * @param socket_id | |
168 | * The *socket_id* argument is the socket identifier in the case of | |
169 | * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. | |
170 | * @return | |
171 | * The pointer to the new allocated fragmentation table, on success. NULL on error. | |
172 | */ | |
173 | struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, | |
174 | uint32_t bucket_entries, uint32_t max_entries, | |
175 | uint64_t max_cycles, int socket_id); | |
176 | ||
177 | /** | |
178 | * Free allocated IP fragmentation table. | |
179 | * | |
180 | * @param tbl | |
181 | * Fragmentation table to free. | |
182 | */ | |
183 | static inline void | |
184 | rte_ip_frag_table_destroy(struct rte_ip_frag_tbl *tbl) | |
185 | { | |
186 | rte_free(tbl); | |
187 | } | |
188 | ||
189 | /** | |
190 | * This function implements the fragmentation of IPv6 packets. | |
191 | * | |
192 | * @param pkt_in | |
193 | * The input packet. | |
194 | * @param pkts_out | |
195 | * Array storing the output fragments. | |
196 | * @param nb_pkts_out | |
197 | * Number of fragments. | |
198 | * @param mtu_size | |
199 | * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 | |
200 | * datagrams. This value includes the size of the IPv6 header. | |
201 | * @param pool_direct | |
202 | * MBUF pool used for allocating direct buffers for the output fragments. | |
203 | * @param pool_indirect | |
204 | * MBUF pool used for allocating indirect buffers for the output fragments. | |
205 | * @return | |
206 | * Upon successful completion - number of output fragments placed | |
207 | * in the pkts_out array. | |
208 | * Otherwise - (-1) * errno. | |
209 | */ | |
210 | int32_t | |
211 | rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, | |
212 | struct rte_mbuf **pkts_out, | |
213 | uint16_t nb_pkts_out, | |
214 | uint16_t mtu_size, | |
215 | struct rte_mempool *pool_direct, | |
216 | struct rte_mempool *pool_indirect); | |
217 | ||
218 | /** | |
219 | * This function implements reassembly of fragmented IPv6 packets. | |
220 | * Incoming mbuf should have its l2_len/l3_len fields setup correctly. | |
221 | * | |
222 | * @param tbl | |
223 | * Table where to lookup/add the fragmented packet. | |
224 | * @param dr | |
225 | * Death row to free buffers to | |
226 | * @param mb | |
227 | * Incoming mbuf with IPv6 fragment. | |
228 | * @param tms | |
229 | * Fragment arrival timestamp. | |
230 | * @param ip_hdr | |
231 | * Pointer to the IPv6 header. | |
232 | * @param frag_hdr | |
233 | * Pointer to the IPv6 fragment extension header. | |
234 | * @return | |
235 | * Pointer to mbuf for reassembled packet, or NULL if: | |
236 | * - an error occured. | |
237 | * - not all fragments of the packet are collected yet. | |
238 | */ | |
239 | struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, | |
240 | struct rte_ip_frag_death_row *dr, | |
241 | struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr, | |
242 | struct ipv6_extension_fragment *frag_hdr); | |
243 | ||
244 | /** | |
245 | * Return a pointer to the packet's fragment header, if found. | |
246 | * It only looks at the extension header that's right after the fixed IPv6 | |
247 | * header, and doesn't follow the whole chain of extension headers. | |
248 | * | |
249 | * @param hdr | |
250 | * Pointer to the IPv6 header. | |
251 | * @return | |
252 | * Pointer to the IPv6 fragment extension header, or NULL if it's not | |
253 | * present. | |
254 | */ | |
255 | static inline struct ipv6_extension_fragment * | |
256 | rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr) | |
257 | { | |
258 | if (hdr->proto == IPPROTO_FRAGMENT) { | |
259 | return (struct ipv6_extension_fragment *) ++hdr; | |
260 | } | |
261 | else | |
262 | return NULL; | |
263 | } | |
264 | ||
265 | /** | |
266 | * IPv4 fragmentation. | |
267 | * | |
268 | * This function implements the fragmentation of IPv4 packets. | |
269 | * | |
270 | * @param pkt_in | |
271 | * The input packet. | |
272 | * @param pkts_out | |
273 | * Array storing the output fragments. | |
274 | * @param nb_pkts_out | |
275 | * Number of fragments. | |
276 | * @param mtu_size | |
277 | * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 | |
278 | * datagrams. This value includes the size of the IPv4 header. | |
279 | * @param pool_direct | |
280 | * MBUF pool used for allocating direct buffers for the output fragments. | |
281 | * @param pool_indirect | |
282 | * MBUF pool used for allocating indirect buffers for the output fragments. | |
283 | * @return | |
284 | * Upon successful completion - number of output fragments placed | |
285 | * in the pkts_out array. | |
286 | * Otherwise - (-1) * errno. | |
287 | */ | |
288 | int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, | |
289 | struct rte_mbuf **pkts_out, | |
290 | uint16_t nb_pkts_out, uint16_t mtu_size, | |
291 | struct rte_mempool *pool_direct, | |
292 | struct rte_mempool *pool_indirect); | |
293 | ||
294 | /** | |
295 | * This function implements reassembly of fragmented IPv4 packets. | |
296 | * Incoming mbufs should have its l2_len/l3_len fields setup correclty. | |
297 | * | |
298 | * @param tbl | |
299 | * Table where to lookup/add the fragmented packet. | |
300 | * @param dr | |
301 | * Death row to free buffers to | |
302 | * @param mb | |
303 | * Incoming mbuf with IPv4 fragment. | |
304 | * @param tms | |
305 | * Fragment arrival timestamp. | |
306 | * @param ip_hdr | |
307 | * Pointer to the IPV4 header inside the fragment. | |
308 | * @return | |
309 | * Pointer to mbuf for reassebled packet, or NULL if: | |
310 | * - an error occured. | |
311 | * - not all fragments of the packet are collected yet. | |
312 | */ | |
313 | struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, | |
314 | struct rte_ip_frag_death_row *dr, | |
315 | struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr); | |
316 | ||
317 | /** | |
318 | * Check if the IPv4 packet is fragmented | |
319 | * | |
320 | * @param hdr | |
321 | * IPv4 header of the packet | |
322 | * @return | |
323 | * 1 if fragmented, 0 if not fragmented | |
324 | */ | |
325 | static inline int | |
326 | rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) { | |
327 | uint16_t flag_offset, ip_flag, ip_ofs; | |
328 | ||
329 | flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); | |
330 | ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); | |
331 | ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); | |
332 | ||
333 | return ip_flag != 0 || ip_ofs != 0; | |
334 | } | |
335 | ||
336 | /** | |
337 | * Free mbufs on a given death row. | |
338 | * | |
339 | * @param dr | |
340 | * Death row to free mbufs in. | |
341 | * @param prefetch | |
342 | * How many buffers to prefetch before freeing. | |
343 | */ | |
344 | void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, | |
345 | uint32_t prefetch); | |
346 | ||
347 | ||
348 | /** | |
349 | * Dump fragmentation table statistics to file. | |
350 | * | |
351 | * @param f | |
352 | * File to dump statistics to | |
353 | * @param tbl | |
354 | * Fragmentation table to dump statistics from | |
355 | */ | |
356 | void | |
357 | rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); | |
358 | ||
359 | #ifdef __cplusplus | |
360 | } | |
361 | #endif | |
362 | ||
363 | #endif /* _RTE_IP_FRAG_H_ */ |