]>
Commit | Line | Data |
---|---|---|
4ea96698 DB |
1 | /* |
2 | * Copyright (c) 2019 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include <ctype.h> | |
19 | #include <errno.h> | |
20 | #include <sys/types.h> | |
21 | #include <netinet/in.h> | |
22 | #include <netinet/ip6.h> | |
23 | #include <netinet/icmp6.h> | |
24 | #include <string.h> | |
25 | ||
26 | #include "coverage.h" | |
27 | #include "csum.h" | |
28 | #include "ipf.h" | |
29 | #include "latch.h" | |
30 | #include "openvswitch/hmap.h" | |
31 | #include "openvswitch/poll-loop.h" | |
32 | #include "openvswitch/vlog.h" | |
33 | #include "ovs-atomic.h" | |
34 | #include "packets.h" | |
35 | #include "util.h" | |
36 | ||
37 | VLOG_DEFINE_THIS_MODULE(ipf); | |
38 | COVERAGE_DEFINE(ipf_stuck_frag_list_purged); | |
39 | ||
40 | enum { | |
41 | IPV4_PACKET_MAX_HDR_SIZE = 60, | |
42 | IPV4_PACKET_MAX_SIZE = 65535, | |
43 | IPV6_PACKET_MAX_DATA = 65535, | |
44 | }; | |
45 | ||
46 | enum ipf_list_state { | |
47 | IPF_LIST_STATE_UNUSED, | |
48 | IPF_LIST_STATE_REASS_FAIL, | |
49 | IPF_LIST_STATE_OTHER_SEEN, | |
50 | IPF_LIST_STATE_FIRST_SEEN, | |
51 | IPF_LIST_STATE_LAST_SEEN, | |
52 | IPF_LIST_STATE_FIRST_LAST_SEEN, | |
53 | IPF_LIST_STATE_COMPLETED, | |
54 | IPF_LIST_STATE_NUM, | |
55 | }; | |
56 | ||
57 | static char *ipf_state_name[IPF_LIST_STATE_NUM] = | |
58 | {"unused", "reassemble fail", "other frag", "first frag", "last frag", | |
59 | "first/last frag", "complete"}; | |
60 | ||
61 | enum ipf_list_type { | |
62 | IPF_FRAG_COMPLETED_LIST, | |
63 | IPF_FRAG_EXPIRY_LIST, | |
64 | }; | |
65 | ||
66 | enum { | |
67 | IPF_INVALID_IDX = -1, | |
68 | IPF_V4_FRAG_SIZE_LBOUND = 400, | |
69 | IPF_V4_FRAG_SIZE_MIN_DEF = 1200, | |
70 | IPF_V6_FRAG_SIZE_LBOUND = 400, /* Useful for testing. */ | |
71 | IPF_V6_FRAG_SIZE_MIN_DEF = 1280, | |
72 | IPF_MAX_FRAGS_DEFAULT = 1000, | |
73 | IPF_NFRAG_UBOUND = 5000, | |
74 | }; | |
75 | ||
76 | enum ipf_counter_type { | |
77 | IPF_NFRAGS_ACCEPTED, | |
78 | IPF_NFRAGS_COMPL_SENT, | |
79 | IPF_NFRAGS_EXPD_SENT, | |
80 | IPF_NFRAGS_TOO_SMALL, | |
81 | IPF_NFRAGS_OVERLAP, | |
82 | IPF_NFRAGS_PURGED, | |
83 | IPF_NFRAGS_NUM_CNTS, | |
84 | }; | |
85 | ||
86 | union ipf_addr { | |
87 | ovs_be32 ipv4; | |
88 | struct in6_addr ipv6; | |
89 | }; | |
90 | ||
91 | /* Represents a single fragment; part of a list of fragments. */ | |
92 | struct ipf_frag { | |
93 | struct dp_packet *pkt; | |
94 | uint16_t start_data_byte; | |
95 | uint16_t end_data_byte; | |
96 | bool dnsteal; /* 'do not steal': if true, ipf should not free packet. */ | |
97 | }; | |
98 | ||
99 | /* The key for a collection of fragments potentially making up an unfragmented | |
100 | * packet. */ | |
101 | struct ipf_list_key { | |
102 | /* ipf_list_key_hash() requires 'src_addr' and 'dst_addr' to be the first | |
103 | * two members. */ | |
104 | union ipf_addr src_addr; | |
105 | union ipf_addr dst_addr; | |
106 | uint32_t recirc_id; | |
107 | ovs_be32 ip_id; /* V6 is 32 bits. */ | |
108 | ovs_be16 dl_type; | |
109 | uint16_t zone; | |
110 | uint8_t nw_proto; | |
111 | }; | |
112 | ||
113 | /* A collection of fragments potentially making up an unfragmented packet. */ | |
114 | struct ipf_list { | |
115 | struct hmap_node node; /* In struct ipf's 'frag_lists'. */ | |
116 | struct ovs_list list_node; /* In struct ipf's 'frag_exp_list' or | |
117 | * 'frag_complete_list'. */ | |
118 | struct ipf_frag *frag_list; /* List of fragments for this list. */ | |
119 | struct ipf_list_key key; /* The key for the fragemnt list. */ | |
120 | struct dp_packet *reass_execute_ctx; /* Reassembled packet. */ | |
121 | long long expiration; /* In milliseconds. */ | |
122 | int last_sent_idx; /* Last sent fragment idx. */ | |
123 | int last_inuse_idx; /* Last inuse fragment idx. */ | |
124 | int size; /* Fragment list size. */ | |
125 | uint8_t state; /* Frag list state; see ipf_list_state. */ | |
126 | }; | |
127 | ||
128 | /* Represents a reassambled packet which typically is passed through | |
129 | * conntrack. */ | |
130 | struct reassembled_pkt { | |
131 | struct ovs_list rp_list_node; /* In struct ipf's | |
132 | * 'reassembled_pkt_list'. */ | |
133 | struct dp_packet *pkt; | |
134 | struct ipf_list *list; | |
135 | }; | |
136 | ||
137 | struct ipf { | |
138 | /* The clean thread is used to clean up fragments in the 'ipf' | |
139 | * module if packet batches are not longer be sent through its user. */ | |
140 | pthread_t ipf_clean_thread; | |
141 | struct latch ipf_clean_thread_exit; | |
142 | ||
143 | int max_v4_frag_list_size; | |
144 | ||
145 | struct ovs_mutex ipf_lock; /* Protects all of the following. */ | |
146 | /* These contain 'struct ipf_list's. */ | |
147 | struct hmap frag_lists OVS_GUARDED; | |
148 | struct ovs_list frag_exp_list OVS_GUARDED; | |
149 | struct ovs_list frag_complete_list OVS_GUARDED; | |
150 | /* Contains 'struct reassembled_pkt's. */ | |
151 | struct ovs_list reassembled_pkt_list OVS_GUARDED; | |
152 | ||
153 | /* Used to allow disabling fragmentation reassembly. */ | |
154 | atomic_bool ifp_v4_enabled; | |
155 | atomic_bool ifp_v6_enabled; | |
156 | ||
157 | /* Will be clamped above 400 bytes; the value chosen should handle | |
158 | * alg control packets of interest that use string encoding of mutable | |
159 | * IP fields; meaning, the control packets should not be fragmented. */ | |
160 | atomic_uint min_v4_frag_size; | |
161 | atomic_uint min_v6_frag_size; | |
162 | ||
163 | /* Configurable maximum allowable fragments in process. */ | |
164 | atomic_uint nfrag_max; | |
165 | ||
166 | /* Number of fragments in process. */ | |
167 | atomic_count nfrag; | |
168 | ||
169 | atomic_uint64_t n4frag_cnt[IPF_NFRAGS_NUM_CNTS]; | |
170 | atomic_uint64_t n6frag_cnt[IPF_NFRAGS_NUM_CNTS]; | |
171 | }; | |
172 | ||
173 | static void | |
174 | ipf_print_reass_packet(const char *es, const void *pkt) | |
175 | { | |
176 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); | |
177 | if (!VLOG_DROP_WARN(&rl)) { | |
178 | struct ds ds = DS_EMPTY_INITIALIZER; | |
179 | ds_put_hex_dump(&ds, pkt, 128, 0, false); | |
180 | VLOG_WARN("%s\n%s", es, ds_cstr(&ds)); | |
181 | ds_destroy(&ds); | |
182 | } | |
183 | } | |
184 | ||
185 | static void | |
186 | ipf_count(struct ipf *ipf, bool v6, enum ipf_counter_type cntr) | |
187 | { | |
188 | atomic_count_inc64(v6 ? &ipf->n6frag_cnt[cntr] : &ipf->n4frag_cnt[cntr]); | |
189 | } | |
190 | ||
191 | static bool | |
192 | ipf_get_v4_enabled(struct ipf *ipf) | |
193 | { | |
194 | bool ifp_v4_enabled_; | |
195 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ifp_v4_enabled_); | |
196 | return ifp_v4_enabled_; | |
197 | } | |
198 | ||
199 | static bool | |
200 | ipf_get_v6_enabled(struct ipf *ipf) | |
201 | { | |
202 | bool ifp_v6_enabled_; | |
203 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ifp_v6_enabled_); | |
204 | return ifp_v6_enabled_; | |
205 | } | |
206 | ||
207 | static bool | |
208 | ipf_get_enabled(struct ipf *ipf) | |
209 | { | |
210 | return ipf_get_v4_enabled(ipf) || ipf_get_v6_enabled(ipf); | |
211 | } | |
212 | ||
213 | static uint32_t | |
214 | ipf_addr_hash_add(uint32_t hash, const union ipf_addr *addr) | |
215 | { | |
216 | BUILD_ASSERT_DECL(sizeof *addr % 4 == 0); | |
217 | return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr); | |
218 | } | |
219 | ||
220 | /* Adds a list of fragments to the list tracking expiry of yet to be | |
221 | * completed reassembled packets, hence subject to expirty. */ | |
222 | static void | |
223 | ipf_expiry_list_add(struct ovs_list *frag_exp_list, struct ipf_list *ipf_list, | |
224 | long long now) | |
225 | /* OVS_REQUIRES(ipf->ipf_lock) */ | |
226 | { | |
227 | enum { | |
228 | IPF_FRAG_LIST_TIMEOUT = 15000, | |
229 | }; | |
230 | ||
231 | ipf_list->expiration = now + IPF_FRAG_LIST_TIMEOUT; | |
232 | ovs_list_push_back(frag_exp_list, &ipf_list->list_node); | |
233 | } | |
234 | ||
235 | /* Adds a list of fragments to the list of completed packets, which will be | |
236 | * subsequently transmitted. */ | |
237 | static void | |
238 | ipf_completed_list_add(struct ovs_list *frag_complete_list, | |
239 | struct ipf_list *ipf_list) | |
240 | /* OVS_REQUIRES(ipf_lock) */ | |
241 | { | |
242 | ovs_list_push_back(frag_complete_list, &ipf_list->list_node); | |
243 | } | |
244 | ||
245 | /* Adds a reassmebled packet to the list of reassembled packets, awaiting some | |
246 | * processing, such as being sent through conntrack. */ | |
247 | static void | |
248 | ipf_reassembled_list_add(struct ovs_list *reassembled_pkt_list, | |
249 | struct reassembled_pkt *rp) | |
250 | /* OVS_REQUIRES(ipf_lock) */ | |
251 | { | |
252 | ovs_list_push_back(reassembled_pkt_list, &rp->rp_list_node); | |
253 | } | |
254 | ||
255 | /* Removed a frag list from tracking datastructures and frees list heap | |
256 | * memory. */ | |
257 | static void | |
258 | ipf_list_clean(struct hmap *frag_lists, | |
259 | struct ipf_list *ipf_list) | |
260 | /* OVS_REQUIRES(ipf_lock) */ | |
261 | { | |
262 | ovs_list_remove(&ipf_list->list_node); | |
263 | hmap_remove(frag_lists, &ipf_list->node); | |
264 | free(ipf_list->frag_list); | |
265 | free(ipf_list); | |
266 | } | |
267 | ||
268 | /* Removed a frag list sitting on the expiry list from tracking | |
269 | * datastructures and frees list heap memory. */ | |
270 | static void | |
271 | ipf_expiry_list_clean(struct hmap *frag_lists, | |
272 | struct ipf_list *ipf_list) | |
273 | /* OVS_REQUIRES(ipf_lock) */ | |
274 | { | |
275 | ipf_list_clean(frag_lists, ipf_list); | |
276 | } | |
277 | ||
278 | /* Removed a frag list sitting on the completed list from tracking | |
279 | * datastructures and frees list heap memory. */ | |
280 | static void | |
281 | ipf_completed_list_clean(struct hmap *frag_lists, | |
282 | struct ipf_list *ipf_list) | |
283 | /* OVS_REQUIRES(ipf_lock) */ | |
284 | { | |
285 | ipf_list_clean(frag_lists, ipf_list); | |
286 | } | |
287 | ||
288 | static void | |
289 | ipf_expiry_list_remove(struct ipf_list *ipf_list) | |
290 | /* OVS_REQUIRES(ipf_lock) */ | |
291 | { | |
292 | ovs_list_remove(&ipf_list->list_node); | |
293 | } | |
294 | ||
295 | static void | |
296 | ipf_reassembled_list_remove(struct reassembled_pkt *rp) | |
297 | /* OVS_REQUIRES(ipf_lock) */ | |
298 | { | |
299 | ovs_list_remove(&rp->rp_list_node); | |
300 | } | |
301 | ||
302 | /* Symmetric */ | |
303 | static uint32_t | |
304 | ipf_list_key_hash(const struct ipf_list_key *key, uint32_t basis) | |
305 | { | |
306 | uint32_t hsrc, hdst, hash; | |
307 | hsrc = hdst = basis; | |
308 | hsrc = ipf_addr_hash_add(hsrc, &key->src_addr); | |
309 | hdst = ipf_addr_hash_add(hdst, &key->dst_addr); | |
310 | hash = hsrc ^ hdst; | |
311 | ||
312 | /* Hash the rest of the key. */ | |
313 | return hash_words((uint32_t *) (&key->dst_addr + 1), | |
314 | (uint32_t *) (key + 1) - | |
315 | (uint32_t *) (&key->dst_addr + 1), | |
316 | hash); | |
317 | } | |
318 | ||
319 | static bool | |
320 | ipf_is_first_v4_frag(const struct dp_packet *pkt) | |
321 | { | |
322 | const struct ip_header *l3 = dp_packet_l3(pkt); | |
323 | if (!(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) && | |
324 | l3->ip_frag_off & htons(IP_MORE_FRAGMENTS)) { | |
325 | return true; | |
326 | } | |
327 | return false; | |
328 | } | |
329 | ||
330 | static bool | |
331 | ipf_is_last_v4_frag(const struct dp_packet *pkt) | |
332 | { | |
333 | const struct ip_header *l3 = dp_packet_l3(pkt); | |
334 | if (l3->ip_frag_off & htons(IP_FRAG_OFF_MASK) && | |
335 | !(l3->ip_frag_off & htons(IP_MORE_FRAGMENTS))) { | |
336 | return true; | |
337 | } | |
338 | return false; | |
339 | } | |
340 | ||
341 | static bool | |
342 | ipf_is_v6_frag(ovs_be16 ip6f_offlg) | |
343 | { | |
344 | if (ip6f_offlg & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) { | |
345 | return true; | |
346 | } | |
347 | return false; | |
348 | } | |
349 | ||
350 | static bool | |
351 | ipf_is_first_v6_frag(ovs_be16 ip6f_offlg) | |
352 | { | |
353 | if (!(ip6f_offlg & IP6F_OFF_MASK) && | |
354 | ip6f_offlg & IP6F_MORE_FRAG) { | |
355 | return true; | |
356 | } | |
357 | return false; | |
358 | } | |
359 | ||
360 | static bool | |
361 | ipf_is_last_v6_frag(ovs_be16 ip6f_offlg) | |
362 | { | |
363 | if ((ip6f_offlg & IP6F_OFF_MASK) && | |
364 | !(ip6f_offlg & IP6F_MORE_FRAG)) { | |
365 | return true; | |
366 | } | |
367 | return false; | |
368 | } | |
369 | ||
370 | /* Checks for a completed packet collection of fragments. */ | |
371 | static bool | |
372 | ipf_list_complete(const struct ipf_list *ipf_list) | |
373 | /* OVS_REQUIRES(ipf_lock) */ | |
374 | { | |
375 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { | |
376 | if (ipf_list->frag_list[i - 1].end_data_byte + 1 | |
377 | != ipf_list->frag_list[i].start_data_byte) { | |
378 | return false; | |
379 | } | |
380 | } | |
381 | return true; | |
382 | } | |
383 | ||
384 | /* Runs O(n) for a sorted or almost sorted list. */ | |
385 | static void | |
386 | ipf_sort(struct ipf_frag *frag_list, size_t last_idx) | |
387 | /* OVS_REQUIRES(ipf_lock) */ | |
388 | { | |
389 | for (int li = 1; li <= last_idx; li++) { | |
390 | struct ipf_frag ipf_frag = frag_list[li]; | |
391 | int ci = li - 1; | |
392 | while (ci >= 0 && | |
393 | frag_list[ci].start_data_byte > ipf_frag.start_data_byte) { | |
394 | frag_list[ci + 1] = frag_list[ci]; | |
395 | ci--; | |
396 | } | |
397 | frag_list[ci + 1] = ipf_frag; | |
398 | } | |
399 | } | |
400 | ||
401 | /* Called on a sorted complete list of v4 fragments to reassemble them into | |
402 | * a single packet that can be processed, such as passing through conntrack. | |
403 | */ | |
404 | static struct dp_packet * | |
405 | ipf_reassemble_v4_frags(struct ipf_list *ipf_list) | |
406 | /* OVS_REQUIRES(ipf_lock) */ | |
407 | { | |
408 | struct ipf_frag *frag_list = ipf_list->frag_list; | |
409 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); | |
0caac1e9 | 410 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
4ea96698 DB |
411 | struct ip_header *l3 = dp_packet_l3(pkt); |
412 | int len = ntohs(l3->ip_tot_len); | |
413 | ||
414 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - | |
415 | frag_list[1].start_data_byte + 1; | |
416 | ||
417 | if (len + rest_len > IPV4_PACKET_MAX_SIZE) { | |
418 | ipf_print_reass_packet( | |
419 | "Unsupported big reassembled v4 packet; v4 hdr:", l3); | |
420 | dp_packet_delete(pkt); | |
421 | return NULL; | |
422 | } | |
423 | ||
e5321336 | 424 | dp_packet_prealloc_tailroom(pkt, rest_len); |
4ea96698 DB |
425 | |
426 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { | |
427 | size_t add_len = frag_list[i].end_data_byte - | |
428 | frag_list[i].start_data_byte + 1; | |
4ea96698 DB |
429 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
430 | dp_packet_put(pkt, l4, add_len); | |
431 | } | |
e5321336 DB |
432 | |
433 | len += rest_len; | |
4ea96698 DB |
434 | l3 = dp_packet_l3(pkt); |
435 | ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS); | |
29cf9c1b FL |
436 | if (!dp_packet_hwol_is_ipv4(pkt)) { |
437 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off, | |
438 | new_ip_frag_off); | |
439 | l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len)); | |
440 | } | |
4ea96698 DB |
441 | l3->ip_tot_len = htons(len); |
442 | l3->ip_frag_off = new_ip_frag_off; | |
443 | dp_packet_set_l2_pad_size(pkt, 0); | |
444 | ||
445 | return pkt; | |
446 | } | |
447 | ||
448 | /* Called on a sorted complete list of v6 fragments to reassemble them into | |
449 | * a single packet that can be processed, such as passing through conntrack. | |
450 | */ | |
451 | static struct dp_packet * | |
452 | ipf_reassemble_v6_frags(struct ipf_list *ipf_list) | |
453 | /* OVS_REQUIRES(ipf_lock) */ | |
454 | { | |
455 | struct ipf_frag *frag_list = ipf_list->frag_list; | |
456 | struct dp_packet *pkt = dp_packet_clone(frag_list[0].pkt); | |
0caac1e9 | 457 | dp_packet_set_size(pkt, dp_packet_size(pkt) - dp_packet_l2_pad_size(pkt)); |
4ea96698 DB |
458 | struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); |
459 | int pl = ntohs(l3->ip6_plen) - sizeof(struct ovs_16aligned_ip6_frag); | |
460 | ||
461 | int rest_len = frag_list[ipf_list->last_inuse_idx].end_data_byte - | |
462 | frag_list[1].start_data_byte + 1; | |
463 | ||
6efa20f2 | 464 | if (pl + rest_len > IPV6_PACKET_MAX_DATA) { |
4ea96698 DB |
465 | ipf_print_reass_packet( |
466 | "Unsupported big reassembled v6 packet; v6 hdr:", l3); | |
467 | dp_packet_delete(pkt); | |
468 | return NULL; | |
469 | } | |
470 | ||
e5321336 | 471 | dp_packet_prealloc_tailroom(pkt, rest_len); |
4ea96698 DB |
472 | |
473 | for (int i = 1; i <= ipf_list->last_inuse_idx; i++) { | |
474 | size_t add_len = frag_list[i].end_data_byte - | |
475 | frag_list[i].start_data_byte + 1; | |
4ea96698 DB |
476 | const char *l4 = dp_packet_l4(frag_list[i].pkt); |
477 | dp_packet_put(pkt, l4, add_len); | |
478 | } | |
479 | ||
e5321336 | 480 | pl += rest_len; |
4ea96698 DB |
481 | l3 = dp_packet_l3(pkt); |
482 | ||
483 | uint8_t nw_proto = l3->ip6_nxt; | |
484 | uint8_t nw_frag = 0; | |
485 | const void *data = l3 + 1; | |
486 | size_t datasize = pl; | |
487 | ||
488 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; | |
489 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr) | |
490 | || !nw_frag || !frag_hdr) { | |
491 | ||
492 | ipf_print_reass_packet("Unparsed reassembled v6 packet; v6 hdr:", l3); | |
493 | dp_packet_delete(pkt); | |
494 | return NULL; | |
495 | } | |
496 | ||
497 | struct ovs_16aligned_ip6_frag *fh = | |
498 | CONST_CAST(struct ovs_16aligned_ip6_frag *, frag_hdr); | |
499 | fh->ip6f_offlg = 0; | |
500 | l3->ip6_plen = htons(pl); | |
501 | l3->ip6_ctlun.ip6_un1.ip6_un1_nxt = nw_proto; | |
502 | dp_packet_set_l2_pad_size(pkt, 0); | |
503 | return pkt; | |
504 | } | |
505 | ||
506 | /* Called when a frag list state transitions to another state. This is | |
507 | * triggered by new fragment for the list being received.*/ | |
508 | static void | |
509 | ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, | |
510 | bool ff, bool lf, bool v6) | |
511 | OVS_REQUIRES(ipf->ipf_lock) | |
512 | { | |
513 | enum ipf_list_state curr_state = ipf_list->state; | |
514 | enum ipf_list_state next_state; | |
515 | switch (curr_state) { | |
516 | case IPF_LIST_STATE_UNUSED: | |
517 | case IPF_LIST_STATE_OTHER_SEEN: | |
518 | if (ff) { | |
519 | next_state = IPF_LIST_STATE_FIRST_SEEN; | |
520 | } else if (lf) { | |
521 | next_state = IPF_LIST_STATE_LAST_SEEN; | |
522 | } else { | |
523 | next_state = IPF_LIST_STATE_OTHER_SEEN; | |
524 | } | |
525 | break; | |
526 | case IPF_LIST_STATE_FIRST_SEEN: | |
206a26e5 | 527 | if (lf) { |
4ea96698 DB |
528 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; |
529 | } else { | |
530 | next_state = IPF_LIST_STATE_FIRST_SEEN; | |
531 | } | |
532 | break; | |
533 | case IPF_LIST_STATE_LAST_SEEN: | |
534 | if (ff) { | |
535 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; | |
4ea96698 DB |
536 | } else { |
537 | next_state = IPF_LIST_STATE_LAST_SEEN; | |
538 | } | |
539 | break; | |
540 | case IPF_LIST_STATE_FIRST_LAST_SEEN: | |
541 | next_state = IPF_LIST_STATE_FIRST_LAST_SEEN; | |
542 | break; | |
543 | case IPF_LIST_STATE_COMPLETED: | |
544 | case IPF_LIST_STATE_REASS_FAIL: | |
545 | case IPF_LIST_STATE_NUM: | |
546 | default: | |
547 | OVS_NOT_REACHED(); | |
548 | } | |
549 | ||
550 | if (next_state == IPF_LIST_STATE_FIRST_LAST_SEEN) { | |
551 | ipf_sort(ipf_list->frag_list, ipf_list->last_inuse_idx); | |
552 | if (ipf_list_complete(ipf_list)) { | |
553 | struct dp_packet *reass_pkt = v6 | |
554 | ? ipf_reassemble_v6_frags(ipf_list) | |
555 | : ipf_reassemble_v4_frags(ipf_list); | |
556 | if (reass_pkt) { | |
557 | struct reassembled_pkt *rp = xzalloc(sizeof *rp); | |
558 | rp->pkt = reass_pkt; | |
559 | rp->list = ipf_list; | |
560 | ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); | |
561 | ipf_expiry_list_remove(ipf_list); | |
562 | next_state = IPF_LIST_STATE_COMPLETED; | |
563 | } else { | |
564 | next_state = IPF_LIST_STATE_REASS_FAIL; | |
565 | } | |
566 | } | |
567 | } | |
568 | ipf_list->state = next_state; | |
569 | } | |
570 | ||
571 | /* Some sanity checks are redundant, but prudent, in case code paths for | |
572 | * fragments change in future. The processing cost for fragments is not | |
573 | * important. */ | |
574 | static bool | |
575 | ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt) | |
576 | { | |
577 | if (OVS_UNLIKELY(dp_packet_ip_checksum_bad(pkt))) { | |
578 | goto invalid_pkt; | |
579 | } | |
580 | ||
581 | const struct eth_header *l2 = dp_packet_eth(pkt); | |
582 | const struct ip_header *l3 = dp_packet_l3(pkt); | |
583 | ||
584 | if (OVS_UNLIKELY(!l2 || !l3)) { | |
585 | goto invalid_pkt; | |
586 | } | |
587 | ||
588 | size_t l3_size = dp_packet_l3_size(pkt); | |
589 | if (OVS_UNLIKELY(l3_size < IP_HEADER_LEN)) { | |
590 | goto invalid_pkt; | |
591 | } | |
592 | ||
593 | if (!IP_IS_FRAGMENT(l3->ip_frag_off)) { | |
594 | return false; | |
595 | } | |
596 | ||
597 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); | |
598 | if (OVS_UNLIKELY(ip_tot_len != l3_size)) { | |
599 | goto invalid_pkt; | |
600 | } | |
601 | ||
602 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; | |
603 | if (OVS_UNLIKELY(ip_hdr_len < IP_HEADER_LEN)) { | |
604 | goto invalid_pkt; | |
605 | } | |
606 | if (OVS_UNLIKELY(l3_size < ip_hdr_len)) { | |
607 | goto invalid_pkt; | |
608 | } | |
609 | ||
610 | if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(pkt) | |
29cf9c1b | 611 | && !dp_packet_hwol_is_ipv4(pkt) |
4ea96698 DB |
612 | && csum(l3, ip_hdr_len) != 0)) { |
613 | goto invalid_pkt; | |
614 | } | |
615 | ||
616 | uint32_t min_v4_frag_size_; | |
617 | atomic_read_relaxed(&ipf->min_v4_frag_size, &min_v4_frag_size_); | |
618 | bool lf = ipf_is_last_v4_frag(pkt); | |
9b5136c3 | 619 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v4_frag_size_)) { |
4ea96698 DB |
620 | ipf_count(ipf, false, IPF_NFRAGS_TOO_SMALL); |
621 | goto invalid_pkt; | |
622 | } | |
623 | return true; | |
624 | ||
625 | invalid_pkt: | |
626 | pkt->md.ct_state = CS_INVALID; | |
627 | return false; | |
628 | } | |
629 | ||
630 | static bool | |
631 | ipf_v4_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, | |
632 | struct ipf_list_key *key, uint16_t *start_data_byte, | |
633 | uint16_t *end_data_byte, bool *ff, bool *lf) | |
634 | { | |
635 | const struct ip_header *l3 = dp_packet_l3(pkt); | |
636 | uint16_t ip_tot_len = ntohs(l3->ip_tot_len); | |
637 | size_t ip_hdr_len = IP_IHL(l3->ip_ihl_ver) * 4; | |
638 | ||
639 | *start_data_byte = ntohs(l3->ip_frag_off & htons(IP_FRAG_OFF_MASK)) * 8; | |
640 | *end_data_byte = *start_data_byte + ip_tot_len - ip_hdr_len - 1; | |
641 | *ff = ipf_is_first_v4_frag(pkt); | |
642 | *lf = ipf_is_last_v4_frag(pkt); | |
643 | memset(key, 0, sizeof *key); | |
644 | key->ip_id = be16_to_be32(l3->ip_id); | |
645 | key->dl_type = dl_type; | |
646 | key->src_addr.ipv4 = get_16aligned_be32(&l3->ip_src); | |
647 | key->dst_addr.ipv4 = get_16aligned_be32(&l3->ip_dst); | |
648 | key->nw_proto = l3->ip_proto; | |
649 | key->zone = zone; | |
650 | key->recirc_id = pkt->md.recirc_id; | |
651 | return true; | |
652 | } | |
653 | ||
654 | /* Some sanity checks are redundant, but prudent, in case code paths for | |
655 | * fragments change in future. The processing cost for fragments is not | |
656 | * important. */ | |
657 | static bool | |
658 | ipf_is_valid_v6_frag(struct ipf *ipf, struct dp_packet *pkt) | |
659 | { | |
660 | const struct eth_header *l2 = dp_packet_eth(pkt); | |
661 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); | |
662 | const char *l4 = dp_packet_l4(pkt); | |
663 | ||
664 | if (OVS_UNLIKELY(!l2 || !l3 || !l4)) { | |
665 | goto invalid_pkt; | |
666 | } | |
667 | ||
668 | size_t l3_size = dp_packet_l3_size(pkt); | |
669 | size_t l3_hdr_size = sizeof *l3; | |
670 | ||
671 | if (OVS_UNLIKELY(l3_size < l3_hdr_size)) { | |
672 | goto invalid_pkt; | |
673 | } | |
674 | ||
675 | uint8_t nw_frag = 0; | |
676 | uint8_t nw_proto = l3->ip6_nxt; | |
677 | const void *data = l3 + 1; | |
678 | size_t datasize = l3_size - l3_hdr_size; | |
679 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; | |
680 | if (!parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, | |
681 | &frag_hdr) || !nw_frag || !frag_hdr) { | |
682 | return false; | |
683 | } | |
684 | ||
685 | int pl = ntohs(l3->ip6_plen); | |
686 | if (OVS_UNLIKELY(pl + l3_hdr_size != l3_size)) { | |
687 | goto invalid_pkt; | |
688 | } | |
689 | ||
690 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; | |
691 | if (OVS_UNLIKELY(!ipf_is_v6_frag(ip6f_offlg))) { | |
692 | return false; | |
693 | } | |
694 | ||
695 | uint32_t min_v6_frag_size_; | |
696 | atomic_read_relaxed(&ipf->min_v6_frag_size, &min_v6_frag_size_); | |
697 | bool lf = ipf_is_last_v6_frag(ip6f_offlg); | |
698 | ||
9b5136c3 | 699 | if (OVS_UNLIKELY(!lf && dp_packet_l3_size(pkt) < min_v6_frag_size_)) { |
4ea96698 DB |
700 | ipf_count(ipf, true, IPF_NFRAGS_TOO_SMALL); |
701 | goto invalid_pkt; | |
702 | } | |
703 | ||
704 | return true; | |
705 | ||
706 | invalid_pkt: | |
707 | pkt->md.ct_state = CS_INVALID; | |
708 | return false; | |
709 | ||
710 | } | |
711 | ||
712 | static void | |
713 | ipf_v6_key_extract(struct dp_packet *pkt, ovs_be16 dl_type, uint16_t zone, | |
714 | struct ipf_list_key *key, uint16_t *start_data_byte, | |
715 | uint16_t *end_data_byte, bool *ff, bool *lf) | |
716 | { | |
717 | const struct ovs_16aligned_ip6_hdr *l3 = dp_packet_l3(pkt); | |
4ea96698 DB |
718 | uint8_t nw_frag = 0; |
719 | uint8_t nw_proto = l3->ip6_nxt; | |
720 | const void *data = l3 + 1; | |
206a26e5 | 721 | size_t datasize = dp_packet_l3_size(pkt) - sizeof *l3; |
4ea96698 DB |
722 | const struct ovs_16aligned_ip6_frag *frag_hdr = NULL; |
723 | ||
724 | parse_ipv6_ext_hdrs(&data, &datasize, &nw_proto, &nw_frag, &frag_hdr); | |
725 | ovs_assert(nw_frag && frag_hdr); | |
726 | ovs_be16 ip6f_offlg = frag_hdr->ip6f_offlg; | |
727 | *start_data_byte = ntohs(ip6f_offlg & IP6F_OFF_MASK) + | |
728 | sizeof (struct ovs_16aligned_ip6_frag); | |
206a26e5 | 729 | *end_data_byte = *start_data_byte + dp_packet_l4_size(pkt) - 1; |
4ea96698 DB |
730 | *ff = ipf_is_first_v6_frag(ip6f_offlg); |
731 | *lf = ipf_is_last_v6_frag(ip6f_offlg); | |
732 | memset(key, 0, sizeof *key); | |
733 | key->ip_id = get_16aligned_be32(&frag_hdr->ip6f_ident); | |
734 | key->dl_type = dl_type; | |
735 | memcpy(&key->src_addr.ipv6, &l3->ip6_src, sizeof key->src_addr.ipv6); | |
736 | /* We are not supporting parsing of the routing header to use as the | |
737 | * dst address part of the key. */ | |
738 | memcpy(&key->dst_addr.ipv6, &l3->ip6_dst, sizeof key->dst_addr.ipv6); | |
739 | key->nw_proto = 0; /* Not used for key for V6. */ | |
740 | key->zone = zone; | |
741 | key->recirc_id = pkt->md.recirc_id; | |
742 | } | |
743 | ||
744 | static bool | |
745 | ipf_list_key_eq(const struct ipf_list_key *key1, | |
746 | const struct ipf_list_key *key2) | |
747 | /* OVS_REQUIRES(ipf_lock) */ | |
748 | { | |
749 | if (!memcmp(&key1->src_addr, &key2->src_addr, sizeof key1->src_addr) && | |
750 | !memcmp(&key1->dst_addr, &key2->dst_addr, sizeof key1->dst_addr) && | |
751 | key1->dl_type == key2->dl_type && | |
752 | key1->ip_id == key2->ip_id && | |
753 | key1->zone == key2->zone && | |
754 | key1->nw_proto == key2->nw_proto && | |
755 | key1->recirc_id == key2->recirc_id) { | |
756 | return true; | |
757 | } | |
758 | return false; | |
759 | } | |
760 | ||
761 | static struct ipf_list * | |
762 | ipf_list_key_lookup(struct ipf *ipf, const struct ipf_list_key *key, | |
763 | uint32_t hash) | |
6efa20f2 | 764 | OVS_REQUIRES(ipf->ipf_lock) |
4ea96698 DB |
765 | { |
766 | struct ipf_list *ipf_list; | |
767 | HMAP_FOR_EACH_WITH_HASH (ipf_list, node, hash, &ipf->frag_lists) { | |
768 | if (ipf_list_key_eq(&ipf_list->key, key)) { | |
769 | return ipf_list; | |
770 | } | |
771 | } | |
772 | return NULL; | |
773 | } | |
774 | ||
775 | static bool | |
776 | ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, | |
777 | size_t start_data_byte, size_t end_data_byte) | |
778 | /* OVS_REQUIRES(ipf_lock) */ | |
779 | { | |
780 | for (int i = 0; i <= last_inuse_idx; i++) { | |
781 | if ((start_data_byte >= frag_list[i].start_data_byte && | |
782 | start_data_byte <= frag_list[i].end_data_byte) || | |
783 | (end_data_byte >= frag_list[i].start_data_byte && | |
784 | end_data_byte <= frag_list[i].end_data_byte)) { | |
785 | return true; | |
786 | } | |
787 | } | |
788 | return false; | |
789 | } | |
790 | ||
791 | /* Adds a fragment to a list of fragments, if the fragment is not a | |
792 | * duplicate. If the fragment is a duplicate, that fragment is marked | |
793 | * invalid to avoid the work that conntrack would do to mark the fragment | |
794 | * as invalid, which it will in all cases. */ | |
795 | static bool | |
796 | ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, | |
797 | struct dp_packet *pkt, uint16_t start_data_byte, | |
798 | uint16_t end_data_byte, bool ff, bool lf, bool v6, | |
799 | bool dnsteal) | |
800 | OVS_REQUIRES(ipf->ipf_lock) | |
801 | { | |
802 | bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, | |
803 | ipf_list->last_inuse_idx, start_data_byte, end_data_byte); | |
804 | int last_inuse_idx = ipf_list->last_inuse_idx; | |
805 | ||
806 | if (!duped_frag) { | |
807 | if (last_inuse_idx < ipf_list->size - 1) { | |
808 | /* In the case of dpdk, it would be unfortunate if we had | |
809 | * to create a clone fragment outside the dpdk mp due to the | |
810 | * mempool size being too limited. We will otherwise need to | |
811 | * recommend not setting the mempool number of buffers too low | |
812 | * and also clamp the number of fragments. */ | |
813 | struct ipf_frag *frag = &ipf_list->frag_list[last_inuse_idx + 1]; | |
814 | frag->pkt = pkt; | |
815 | frag->start_data_byte = start_data_byte; | |
816 | frag->end_data_byte = end_data_byte; | |
817 | frag->dnsteal = dnsteal; | |
818 | ipf_list->last_inuse_idx++; | |
819 | atomic_count_inc(&ipf->nfrag); | |
820 | ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); | |
821 | ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); | |
822 | } else { | |
823 | OVS_NOT_REACHED(); | |
824 | } | |
825 | } else { | |
826 | ipf_count(ipf, v6, IPF_NFRAGS_OVERLAP); | |
827 | pkt->md.ct_state = CS_INVALID; | |
828 | return false; | |
829 | } | |
830 | return true; | |
831 | } | |
832 | ||
833 | static void | |
834 | ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, | |
835 | int max_frag_list_size) | |
836 | { | |
837 | ipf_list->key = *key; | |
838 | ipf_list->last_inuse_idx = IPF_INVALID_IDX; | |
839 | ipf_list->last_sent_idx = IPF_INVALID_IDX; | |
840 | ipf_list->reass_execute_ctx = NULL; | |
841 | ipf_list->state = IPF_LIST_STATE_UNUSED; | |
842 | ipf_list->size = max_frag_list_size; | |
843 | ipf_list->frag_list | |
844 | = xzalloc(ipf_list->size * sizeof *ipf_list->frag_list); | |
845 | } | |
846 | ||
847 | /* Generates a fragment list key from a well formed fragment and either starts | |
848 | * a new fragment list or increases the size of the existing fragment list, | |
849 | * while checking if the maximum supported fragements are supported or the | |
850 | * list size is impossibly big. Calls 'ipf_process_frag()' to add a fragment | |
851 | * to a list of fragemnts. */ | |
852 | static bool | |
853 | ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, | |
854 | uint16_t zone, long long now, uint32_t hash_basis, | |
855 | bool dnsteal) | |
856 | OVS_REQUIRES(ipf->ipf_lock) | |
857 | { | |
858 | struct ipf_list_key key; | |
859 | /* Initialize 4 variables for some versions of GCC. */ | |
860 | uint16_t start_data_byte = 0; | |
861 | uint16_t end_data_byte = 0; | |
862 | bool ff = false; | |
863 | bool lf = false; | |
864 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); | |
865 | ||
866 | if (v6 && ipf_get_v6_enabled(ipf)) { | |
867 | ipf_v6_key_extract(pkt, dl_type, zone, &key, &start_data_byte, | |
868 | &end_data_byte, &ff, &lf); | |
869 | } else if (!v6 && ipf_get_v4_enabled(ipf)) { | |
870 | ipf_v4_key_extract(pkt, dl_type, zone, &key, &start_data_byte, | |
871 | &end_data_byte, &ff, &lf); | |
872 | } else { | |
873 | return false; | |
874 | } | |
875 | ||
876 | unsigned int nfrag_max; | |
877 | atomic_read_relaxed(&ipf->nfrag_max, &nfrag_max); | |
878 | if (atomic_count_get(&ipf->nfrag) >= nfrag_max) { | |
879 | return false; | |
880 | } | |
881 | ||
882 | uint32_t hash = ipf_list_key_hash(&key, hash_basis); | |
883 | struct ipf_list *ipf_list = ipf_list_key_lookup(ipf, &key, hash); | |
884 | enum { | |
885 | IPF_FRAG_LIST_MIN_INCREMENT = 4, | |
886 | IPF_IPV6_MAX_FRAG_LIST_SIZE = 65535, | |
887 | }; | |
888 | ||
889 | int max_frag_list_size; | |
890 | if (v6) { | |
891 | /* Because the calculation with extension headers is variable, | |
892 | * we don't calculate a hard maximum fragment list size upfront. The | |
893 | * fragment list size is practically limited by the code, however. */ | |
894 | max_frag_list_size = IPF_IPV6_MAX_FRAG_LIST_SIZE; | |
895 | } else { | |
896 | max_frag_list_size = ipf->max_v4_frag_list_size; | |
897 | } | |
898 | ||
899 | if (!ipf_list) { | |
900 | ipf_list = xmalloc(sizeof *ipf_list); | |
901 | ipf_list_init(ipf_list, &key, | |
902 | MIN(max_frag_list_size, IPF_FRAG_LIST_MIN_INCREMENT)); | |
903 | hmap_insert(&ipf->frag_lists, &ipf_list->node, hash); | |
904 | ipf_expiry_list_add(&ipf->frag_exp_list, ipf_list, now); | |
0c3057d5 LR |
905 | } else if (ipf_list->state == IPF_LIST_STATE_REASS_FAIL || |
906 | ipf_list->state == IPF_LIST_STATE_COMPLETED) { | |
4ea96698 DB |
907 | /* Bail out as early as possible. */ |
908 | return false; | |
909 | } else if (ipf_list->last_inuse_idx + 1 >= ipf_list->size) { | |
910 | int increment = MIN(IPF_FRAG_LIST_MIN_INCREMENT, | |
911 | max_frag_list_size - ipf_list->size); | |
912 | /* Enforce limit. */ | |
913 | if (increment > 0) { | |
914 | ipf_list->frag_list = | |
915 | xrealloc(ipf_list->frag_list, (ipf_list->size + increment) * | |
916 | sizeof *ipf_list->frag_list); | |
917 | ipf_list->size += increment; | |
918 | } else { | |
919 | return false; | |
920 | } | |
921 | } | |
922 | ||
923 | return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, | |
924 | end_data_byte, ff, lf, v6, dnsteal); | |
925 | } | |
926 | ||
927 | /* Filters out fragments from a batch of fragments and adjust the batch. */ | |
928 | static void | |
929 | ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, | |
930 | ovs_be16 dl_type, uint16_t zone, long long now, | |
931 | uint32_t hash_basis) | |
932 | { | |
933 | const size_t pb_cnt = dp_packet_batch_size(pb); | |
934 | int pb_idx; /* Index in a packet batch. */ | |
935 | struct dp_packet *pkt; | |
936 | ||
937 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { | |
938 | if (OVS_UNLIKELY((dl_type == htons(ETH_TYPE_IP) && | |
939 | ipf_is_valid_v4_frag(ipf, pkt)) | |
940 | || | |
941 | (dl_type == htons(ETH_TYPE_IPV6) && | |
942 | ipf_is_valid_v6_frag(ipf, pkt)))) { | |
943 | ||
944 | ovs_mutex_lock(&ipf->ipf_lock); | |
945 | if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, | |
946 | pb->do_not_steal)) { | |
947 | dp_packet_batch_refill(pb, pkt, pb_idx); | |
948 | } | |
949 | ovs_mutex_unlock(&ipf->ipf_lock); | |
950 | } else { | |
951 | dp_packet_batch_refill(pb, pkt, pb_idx); | |
952 | } | |
953 | } | |
954 | } | |
955 | ||
956 | /* In case of DPDK, a memory source check is done, as DPDK memory pool | |
957 | * management has trouble dealing with multiple source types. The | |
958 | * check_source paramater is used to indicate when this check is needed. */ | |
959 | static bool | |
960 | ipf_dp_packet_batch_add(struct dp_packet_batch *pb , struct dp_packet *pkt, | |
961 | bool check_source OVS_UNUSED) | |
962 | { | |
963 | #ifdef DPDK_NETDEV | |
964 | if ((dp_packet_batch_is_full(pb)) || | |
965 | /* DPDK cannot handle multiple sources in a batch. */ | |
966 | (check_source && !dp_packet_batch_is_empty(pb) | |
967 | && pb->packets[0]->source != pkt->source)) { | |
968 | #else | |
969 | if (dp_packet_batch_is_full(pb)) { | |
970 | #endif | |
971 | return false; | |
972 | } | |
973 | ||
974 | dp_packet_batch_add(pb, pkt); | |
975 | return true; | |
976 | } | |
977 | ||
978 | /* This would be used in rare cases where a list cannot be sent. One rare | |
979 | * reason known right now is a mempool source check, which exists due to DPDK | |
980 | * support, where packets are no longer being received on any port with a | |
981 | * source matching the fragment. Another reason is a race where all | |
982 | * conntrack rules are unconfigured when some fragments are yet to be | |
983 | * flushed. | |
984 | * | |
985 | * Returns true if the list was purged. */ | |
986 | static bool | |
987 | ipf_purge_list_check(struct ipf *ipf, struct ipf_list *ipf_list, | |
988 | long long now) | |
989 | OVS_REQUIRES(ipf->ipf_lock) | |
990 | { | |
991 | enum { | |
992 | IPF_FRAG_LIST_PURGE_TIME_ADJ = 10000 | |
993 | }; | |
994 | ||
995 | if (now < ipf_list->expiration + IPF_FRAG_LIST_PURGE_TIME_ADJ) { | |
996 | return false; | |
997 | } | |
998 | ||
999 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { | |
1000 | struct dp_packet * pkt | |
1001 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; | |
1002 | dp_packet_delete(pkt); | |
1003 | atomic_count_dec(&ipf->nfrag); | |
1004 | COVERAGE_INC(ipf_stuck_frag_list_purged); | |
1005 | ipf_count(ipf, ipf_list->key.dl_type == htons(ETH_TYPE_IPV6), | |
1006 | IPF_NFRAGS_PURGED); | |
1007 | ipf_list->last_sent_idx++; | |
1008 | } | |
1009 | ||
1010 | return true; | |
1011 | } | |
1012 | ||
1013 | /* Does the packet batch management and common accounting work associated | |
1014 | * with 'ipf_send_completed_frags()' and 'ipf_send_expired_frags()'. */ | |
1015 | static bool | |
1016 | ipf_send_frags_in_list(struct ipf *ipf, struct ipf_list *ipf_list, | |
1017 | struct dp_packet_batch *pb, | |
1018 | enum ipf_list_type list_type, bool v6, long long now) | |
1019 | OVS_REQUIRES(ipf->ipf_lock) | |
1020 | { | |
1021 | if (ipf_purge_list_check(ipf, ipf_list, now)) { | |
1022 | return true; | |
1023 | } | |
1024 | ||
1025 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { | |
1026 | struct dp_packet *pkt | |
1027 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; | |
1028 | if (ipf_dp_packet_batch_add(pb, pkt, true)) { | |
1029 | ipf_list->last_sent_idx++; | |
1030 | atomic_count_dec(&ipf->nfrag); | |
1031 | ||
1032 | if (list_type == IPF_FRAG_COMPLETED_LIST) { | |
1033 | ipf_count(ipf, v6, IPF_NFRAGS_COMPL_SENT); | |
1034 | } else { | |
1035 | ipf_count(ipf, v6, IPF_NFRAGS_EXPD_SENT); | |
1036 | pkt->md.ct_state = CS_INVALID; | |
1037 | } | |
1038 | ||
1039 | if (ipf_list->last_sent_idx == ipf_list->last_inuse_idx) { | |
1040 | return true; | |
1041 | } | |
1042 | } else { | |
1043 | return false; | |
1044 | } | |
1045 | } | |
1046 | OVS_NOT_REACHED(); | |
1047 | } | |
1048 | ||
1049 | /* Adds fragments associated with a completed fragment list to a packet batch | |
1050 | * to be processed by the calling application, typically conntrack. Also | |
1051 | * cleans up the list context when it is empty.*/ | |
1052 | static void | |
1053 | ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, | |
1054 | long long now, bool v6) | |
1055 | { | |
1056 | if (ovs_list_is_empty(&ipf->frag_complete_list)) { | |
1057 | return; | |
1058 | } | |
1059 | ||
1060 | ovs_mutex_lock(&ipf->ipf_lock); | |
1061 | struct ipf_list *ipf_list, *next; | |
1062 | ||
1063 | LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_complete_list) { | |
1064 | if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_COMPLETED_LIST, | |
1065 | v6, now)) { | |
1066 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); | |
1067 | } else { | |
1068 | break; | |
1069 | } | |
1070 | } | |
1071 | ||
1072 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1073 | } | |
1074 | ||
1075 | /* Conservatively adds fragments associated with a expired fragment list to | |
1076 | * a packet batch to be processed by the calling application, typically | |
1077 | * conntrack. Also cleans up the list context when it is empty.*/ | |
1078 | static void | |
1079 | ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb, | |
1080 | long long now, bool v6) | |
1081 | { | |
1082 | enum { | |
1083 | /* Very conservative, due to DOS probability. */ | |
1084 | IPF_FRAG_LIST_MAX_EXPIRED = 1, | |
1085 | }; | |
1086 | ||
1087 | ||
1088 | if (ovs_list_is_empty(&ipf->frag_exp_list)) { | |
1089 | return; | |
1090 | } | |
1091 | ||
1092 | ovs_mutex_lock(&ipf->ipf_lock); | |
1093 | struct ipf_list *ipf_list, *next; | |
1094 | size_t lists_removed = 0; | |
1095 | ||
1096 | LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) { | |
1097 | if (now <= ipf_list->expiration || | |
1098 | lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { | |
1099 | break; | |
1100 | } | |
1101 | ||
1102 | if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_EXPIRY_LIST, | |
1103 | v6, now)) { | |
1104 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); | |
1105 | lists_removed++; | |
1106 | } else { | |
1107 | break; | |
1108 | } | |
1109 | } | |
1110 | ||
1111 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1112 | } | |
1113 | ||
1114 | /* Adds a reassmebled packet to a packet batch to be processed by the caller. | |
1115 | */ | |
1116 | static void | |
1117 | ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb) | |
1118 | { | |
1119 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { | |
1120 | return; | |
1121 | } | |
1122 | ||
1123 | ovs_mutex_lock(&ipf->ipf_lock); | |
1124 | struct reassembled_pkt *rp, *next; | |
1125 | ||
1126 | LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { | |
1127 | if (!rp->list->reass_execute_ctx && | |
1128 | ipf_dp_packet_batch_add(pb, rp->pkt, false)) { | |
1129 | rp->list->reass_execute_ctx = rp->pkt; | |
1130 | } | |
1131 | } | |
1132 | ||
1133 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1134 | } | |
1135 | ||
1136 | /* Checks for reassembled packets post processing by conntrack and edits the | |
1137 | * fragments if needed based on what conntrack decided. */ | |
1138 | static void | |
1139 | ipf_post_execute_reass_pkts(struct ipf *ipf, | |
1140 | struct dp_packet_batch *pb, bool v6) | |
1141 | { | |
1142 | if (ovs_list_is_empty(&ipf->reassembled_pkt_list)) { | |
1143 | return; | |
1144 | } | |
1145 | ||
1146 | ovs_mutex_lock(&ipf->ipf_lock); | |
1147 | struct reassembled_pkt *rp, *next; | |
1148 | ||
1149 | LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { | |
1150 | const size_t pb_cnt = dp_packet_batch_size(pb); | |
1151 | int pb_idx; | |
1152 | struct dp_packet *pkt; | |
1153 | /* Inner batch loop is constant time since batch size is <= | |
1154 | * NETDEV_MAX_BURST. */ | |
1155 | DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { | |
1156 | if (pkt == rp->list->reass_execute_ctx) { | |
1157 | for (int i = 0; i <= rp->list->last_inuse_idx; i++) { | |
1158 | rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label; | |
1159 | rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark; | |
1160 | rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state; | |
1161 | rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone; | |
1162 | rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = | |
1163 | pkt->md.ct_orig_tuple_ipv6; | |
1164 | if (pkt->md.ct_orig_tuple_ipv6) { | |
1165 | rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 = | |
1166 | pkt->md.ct_orig_tuple.ipv6; | |
1167 | } else { | |
1168 | rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 = | |
1169 | pkt->md.ct_orig_tuple.ipv4; | |
1170 | } | |
1171 | } | |
1172 | ||
1173 | const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; | |
4ea96698 DB |
1174 | void *l4_frag = dp_packet_l4(frag_0->pkt); |
1175 | void *l4_reass = dp_packet_l4(pkt); | |
206a26e5 | 1176 | memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); |
4ea96698 DB |
1177 | |
1178 | if (v6) { | |
1179 | struct ovs_16aligned_ip6_hdr *l3_frag | |
1180 | = dp_packet_l3(frag_0->pkt); | |
1181 | struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt); | |
1182 | l3_frag->ip6_src = l3_reass->ip6_src; | |
1183 | l3_frag->ip6_dst = l3_reass->ip6_dst; | |
1184 | } else { | |
1185 | struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); | |
1186 | struct ip_header *l3_reass = dp_packet_l3(pkt); | |
29cf9c1b FL |
1187 | if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) { |
1188 | ovs_be32 reass_ip = | |
1189 | get_16aligned_be32(&l3_reass->ip_src); | |
1190 | ovs_be32 frag_ip = | |
1191 | get_16aligned_be32(&l3_frag->ip_src); | |
1192 | ||
1193 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, | |
1194 | frag_ip, reass_ip); | |
1195 | reass_ip = get_16aligned_be32(&l3_reass->ip_dst); | |
1196 | frag_ip = get_16aligned_be32(&l3_frag->ip_dst); | |
1197 | l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, | |
1198 | frag_ip, reass_ip); | |
1199 | } | |
4ea96698 | 1200 | |
29cf9c1b | 1201 | l3_frag->ip_src = l3_reass->ip_src; |
4ea96698 DB |
1202 | l3_frag->ip_dst = l3_reass->ip_dst; |
1203 | } | |
1204 | ||
1205 | ipf_completed_list_add(&ipf->frag_complete_list, rp->list); | |
1206 | ipf_reassembled_list_remove(rp); | |
1207 | dp_packet_delete(rp->pkt); | |
1208 | free(rp); | |
1209 | } else { | |
1210 | dp_packet_batch_refill(pb, pkt, pb_idx); | |
1211 | } | |
1212 | } | |
1213 | } | |
1214 | ||
1215 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1216 | } | |
1217 | ||
1218 | /* Extracts any fragments from the batch and reassembles them when a | |
1219 | * complete packet is received. Completed packets are attempted to | |
1220 | * be added to the batch to be sent through conntrack. */ | |
1221 | void | |
1222 | ipf_preprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, | |
1223 | long long now, ovs_be16 dl_type, uint16_t zone, | |
1224 | uint32_t hash_basis) | |
1225 | { | |
1226 | if (ipf_get_enabled(ipf)) { | |
1227 | ipf_extract_frags_from_batch(ipf, pb, dl_type, zone, now, hash_basis); | |
1228 | } | |
1229 | ||
1230 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { | |
1231 | ipf_execute_reass_pkts(ipf, pb); | |
1232 | } | |
1233 | } | |
1234 | ||
1235 | /* Updates fragments based on the processing of the reassembled packet sent | |
1236 | * through conntrack and adds these fragments to any batches seen. Expired | |
1237 | * fragments are marked as invalid and also added to the batches seen | |
1238 | * with low priority. Reassembled packets are freed. */ | |
1239 | void | |
1240 | ipf_postprocess_conntrack(struct ipf *ipf, struct dp_packet_batch *pb, | |
1241 | long long now, ovs_be16 dl_type) | |
1242 | { | |
1243 | if (ipf_get_enabled(ipf) || atomic_count_get(&ipf->nfrag)) { | |
1244 | bool v6 = dl_type == htons(ETH_TYPE_IPV6); | |
1245 | ipf_post_execute_reass_pkts(ipf, pb, v6); | |
1246 | ipf_send_completed_frags(ipf, pb, now, v6); | |
1247 | ipf_send_expired_frags(ipf, pb, now, v6); | |
1248 | } | |
1249 | } | |
1250 | ||
1251 | static void * | |
1252 | ipf_clean_thread_main(void *f) | |
1253 | { | |
1254 | struct ipf *ipf = f; | |
1255 | ||
1256 | enum { | |
1257 | IPF_FRAG_LIST_CLEAN_TIMEOUT = 60000, | |
1258 | }; | |
1259 | ||
1260 | while (!latch_is_set(&ipf->ipf_clean_thread_exit)) { | |
1261 | ||
1262 | long long now = time_msec(); | |
1263 | ||
1264 | if (!ovs_list_is_empty(&ipf->frag_exp_list) || | |
1265 | !ovs_list_is_empty(&ipf->frag_complete_list)) { | |
1266 | ||
1267 | ovs_mutex_lock(&ipf->ipf_lock); | |
1268 | ||
1269 | struct ipf_list *ipf_list, *next; | |
1270 | LIST_FOR_EACH_SAFE (ipf_list, next, list_node, | |
1271 | &ipf->frag_exp_list) { | |
1272 | if (ipf_purge_list_check(ipf, ipf_list, now)) { | |
1273 | ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); | |
1274 | } | |
1275 | } | |
1276 | ||
1277 | LIST_FOR_EACH_SAFE (ipf_list, next, list_node, | |
1278 | &ipf->frag_complete_list) { | |
1279 | if (ipf_purge_list_check(ipf, ipf_list, now)) { | |
1280 | ipf_completed_list_clean(&ipf->frag_lists, ipf_list); | |
1281 | } | |
1282 | } | |
1283 | ||
1284 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1285 | } | |
1286 | ||
1287 | poll_timer_wait_until(now + IPF_FRAG_LIST_CLEAN_TIMEOUT); | |
1288 | latch_wait(&ipf->ipf_clean_thread_exit); | |
1289 | poll_block(); | |
1290 | } | |
1291 | ||
1292 | return NULL; | |
1293 | } | |
1294 | ||
1295 | struct ipf * | |
1296 | ipf_init(void) | |
1297 | { | |
1298 | struct ipf *ipf = xzalloc(sizeof *ipf); | |
1299 | ||
1300 | ovs_mutex_init_adaptive(&ipf->ipf_lock); | |
1301 | ovs_mutex_lock(&ipf->ipf_lock); | |
1302 | hmap_init(&ipf->frag_lists); | |
1303 | ovs_list_init(&ipf->frag_exp_list); | |
1304 | ovs_list_init(&ipf->frag_complete_list); | |
1305 | ovs_list_init(&ipf->reassembled_pkt_list); | |
1306 | atomic_init(&ipf->min_v4_frag_size, IPF_V4_FRAG_SIZE_MIN_DEF); | |
1307 | atomic_init(&ipf->min_v6_frag_size, IPF_V6_FRAG_SIZE_MIN_DEF); | |
1308 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( | |
1309 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, | |
1310 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); | |
1311 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1312 | atomic_count_init(&ipf->nfrag, 0); | |
1313 | for (size_t i = 0; i < IPF_NFRAGS_NUM_CNTS; i++) { | |
1314 | atomic_init(&ipf->n4frag_cnt[i], 0); | |
1315 | atomic_init(&ipf->n6frag_cnt[i], 0); | |
1316 | } | |
1317 | atomic_init(&ipf->nfrag_max, IPF_MAX_FRAGS_DEFAULT); | |
1318 | atomic_init(&ipf->ifp_v4_enabled, true); | |
1319 | atomic_init(&ipf->ifp_v6_enabled, true); | |
1320 | latch_init(&ipf->ipf_clean_thread_exit); | |
1321 | ipf->ipf_clean_thread = ovs_thread_create("ipf_clean", | |
1322 | ipf_clean_thread_main, ipf); | |
1323 | ||
1324 | return ipf; | |
1325 | } | |
1326 | ||
1327 | void | |
1328 | ipf_destroy(struct ipf *ipf) | |
1329 | { | |
1330 | ovs_mutex_lock(&ipf->ipf_lock); | |
1331 | latch_set(&ipf->ipf_clean_thread_exit); | |
1332 | pthread_join(ipf->ipf_clean_thread, NULL); | |
1333 | latch_destroy(&ipf->ipf_clean_thread_exit); | |
1334 | ||
1335 | struct ipf_list *ipf_list; | |
1336 | HMAP_FOR_EACH_POP (ipf_list, node, &ipf->frag_lists) { | |
1337 | while (ipf_list->last_sent_idx < ipf_list->last_inuse_idx) { | |
1338 | struct dp_packet *pkt | |
1339 | = ipf_list->frag_list[ipf_list->last_sent_idx + 1].pkt; | |
1340 | if (!ipf_list->frag_list[ipf_list->last_sent_idx + 1].dnsteal) { | |
1341 | dp_packet_delete(pkt); | |
1342 | } | |
1343 | atomic_count_dec(&ipf->nfrag); | |
1344 | ipf_list->last_sent_idx++; | |
1345 | } | |
1346 | free(ipf_list->frag_list); | |
1347 | free(ipf_list); | |
1348 | } | |
1349 | ||
1350 | if (atomic_count_get(&ipf->nfrag)) { | |
1351 | VLOG_WARN("ipf destroy with non-zero fragment count. "); | |
1352 | } | |
1353 | ||
1354 | struct reassembled_pkt *rp; | |
1355 | LIST_FOR_EACH_POP (rp, rp_list_node, &ipf->reassembled_pkt_list) { | |
1356 | dp_packet_delete(rp->pkt); | |
1357 | free(rp); | |
1358 | } | |
1359 | ||
1360 | hmap_destroy(&ipf->frag_lists); | |
1361 | ovs_list_poison(&ipf->frag_exp_list); | |
1362 | ovs_list_poison(&ipf->frag_complete_list); | |
1363 | ovs_list_poison(&ipf->reassembled_pkt_list); | |
1364 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1365 | ovs_mutex_destroy(&ipf->ipf_lock); | |
1366 | free(ipf); | |
1367 | } | |
1368 | ||
1369 | int | |
1370 | ipf_set_enabled(struct ipf *ipf, bool v6, bool enable) | |
1371 | { | |
1372 | atomic_store_relaxed(v6 ? &ipf->ifp_v6_enabled : &ipf->ifp_v4_enabled, | |
1373 | enable); | |
1374 | return 0; | |
1375 | } | |
1376 | ||
1377 | int | |
1378 | ipf_set_min_frag(struct ipf *ipf, bool v6, uint32_t value) | |
1379 | { | |
1380 | /* If the user specifies an unreasonably large number, fragmentation | |
1381 | * will not work well but it will not blow up. */ | |
1382 | if (value < (v6 ? IPF_V6_FRAG_SIZE_LBOUND : IPF_V4_FRAG_SIZE_LBOUND)) { | |
1383 | return 1; | |
1384 | } | |
1385 | ||
1386 | ovs_mutex_lock(&ipf->ipf_lock); | |
1387 | if (v6) { | |
1388 | atomic_store_relaxed(&ipf->min_v6_frag_size, value); | |
1389 | } else { | |
1390 | atomic_store_relaxed(&ipf->min_v4_frag_size, value); | |
1391 | ipf->max_v4_frag_list_size = DIV_ROUND_UP( | |
1392 | IPV4_PACKET_MAX_SIZE - IPV4_PACKET_MAX_HDR_SIZE, | |
1393 | ipf->min_v4_frag_size - IPV4_PACKET_MAX_HDR_SIZE); | |
1394 | } | |
1395 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1396 | return 0; | |
1397 | } | |
1398 | ||
1399 | int | |
1400 | ipf_set_max_nfrags(struct ipf *ipf, uint32_t value) | |
1401 | { | |
1402 | if (value > IPF_NFRAG_UBOUND) { | |
1403 | return 1; | |
1404 | } | |
1405 | atomic_store_relaxed(&ipf->nfrag_max, value); | |
1406 | return 0; | |
1407 | } | |
1408 | ||
1409 | int | |
1410 | ipf_get_status(struct ipf *ipf, struct ipf_status *ipf_status) | |
1411 | { | |
1412 | ipf_status->nfrag = atomic_count_get(&ipf->nfrag); | |
1413 | atomic_read_relaxed(&ipf->nfrag_max, &ipf_status->nfrag_max); | |
1414 | ||
1415 | atomic_read_relaxed(&ipf->ifp_v4_enabled, &ipf_status->v4.enabled); | |
1416 | atomic_read_relaxed(&ipf->min_v4_frag_size, | |
1417 | &ipf_status->v4.min_frag_size); | |
1418 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_ACCEPTED], | |
1419 | &ipf_status->v4.nfrag_accepted); | |
1420 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_COMPL_SENT], | |
1421 | &ipf_status->v4.nfrag_completed_sent); | |
1422 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_EXPD_SENT], | |
1423 | &ipf_status->v4.nfrag_expired_sent); | |
1424 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_TOO_SMALL], | |
1425 | &ipf_status->v4.nfrag_too_small); | |
1426 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_OVERLAP], | |
1427 | &ipf_status->v4.nfrag_overlap); | |
1428 | atomic_read_relaxed(&ipf->n4frag_cnt[IPF_NFRAGS_PURGED], | |
1429 | &ipf_status->v4.nfrag_purged); | |
1430 | ||
1431 | atomic_read_relaxed(&ipf->ifp_v6_enabled, &ipf_status->v6.enabled); | |
1432 | atomic_read_relaxed(&ipf->min_v6_frag_size, | |
1433 | &ipf_status->v6.min_frag_size); | |
1434 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_ACCEPTED], | |
1435 | &ipf_status->v6.nfrag_accepted); | |
1436 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_COMPL_SENT], | |
1437 | &ipf_status->v6.nfrag_completed_sent); | |
1438 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_EXPD_SENT], | |
1439 | &ipf_status->v6.nfrag_expired_sent); | |
1440 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_TOO_SMALL], | |
1441 | &ipf_status->v6.nfrag_too_small); | |
1442 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_OVERLAP], | |
1443 | &ipf_status->v6.nfrag_overlap); | |
1444 | atomic_read_relaxed(&ipf->n6frag_cnt[IPF_NFRAGS_PURGED], | |
1445 | &ipf_status->v6.nfrag_purged); | |
1446 | return 0; | |
1447 | } | |
1448 | ||
1449 | struct ipf_dump_ctx { | |
1450 | struct hmap_position bucket_pos; | |
1451 | }; | |
1452 | ||
1453 | /* Allocates an 'ipf_dump_ctx' to keep track of an hmap position. The | |
1454 | * caller must call ipf_dump_done() when dumping is finished. */ | |
1455 | int | |
1456 | ipf_dump_start(struct ipf_dump_ctx **ipf_dump_ctx) | |
1457 | { | |
1458 | *ipf_dump_ctx = xzalloc(sizeof **ipf_dump_ctx); | |
1459 | return 0; | |
1460 | } | |
1461 | ||
1462 | /* Creates a string representation of the state of an 'ipf_list' and puts | |
1463 | * it in 'ds'. */ | |
1464 | static void | |
1465 | ipf_dump_create(const struct ipf_list *ipf_list, struct ds *ds) | |
1466 | { | |
1467 | ds_put_cstr(ds, "("); | |
1468 | if (ipf_list->key.dl_type == htons(ETH_TYPE_IP)) { | |
1469 | ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",", | |
1470 | IP_ARGS(ipf_list->key.src_addr.ipv4), | |
1471 | IP_ARGS(ipf_list->key.dst_addr.ipv4)); | |
1472 | } else { | |
1473 | ds_put_cstr(ds, "src="); | |
1474 | ipv6_format_addr(&ipf_list->key.src_addr.ipv6, ds); | |
1475 | ds_put_cstr(ds, ",dst="); | |
1476 | ipv6_format_addr(&ipf_list->key.dst_addr.ipv6, ds); | |
1477 | ds_put_cstr(ds, ","); | |
1478 | } | |
1479 | ||
1480 | ds_put_format(ds, "recirc_id=%u,ip_id=%u,dl_type=0x%x,zone=%u,nw_proto=%u", | |
1481 | ipf_list->key.recirc_id, ntohl(ipf_list->key.ip_id), | |
1482 | ntohs(ipf_list->key.dl_type), ipf_list->key.zone, | |
1483 | ipf_list->key.nw_proto); | |
1484 | ||
1485 | ds_put_format(ds, ",num_fragments=%u,state=%s", | |
1486 | ipf_list->last_inuse_idx + 1, | |
1487 | ipf_state_name[ipf_list->state]); | |
1488 | ||
1489 | ds_put_cstr(ds, ")"); | |
1490 | } | |
1491 | ||
1492 | /* Finds the next ipf list starting from 'ipf_dump_ctx->bucket_pos' and uses | |
1493 | * ipf_dump_create() to create a string representation of the state of an | |
1494 | * ipf list, to which 'dump' is pointed to. Returns EOF when there are no | |
1495 | * more ipf lists. */ | |
1496 | int | |
1497 | ipf_dump_next(struct ipf *ipf, struct ipf_dump_ctx *ipf_dump_ctx, char **dump) | |
1498 | { | |
1499 | ovs_mutex_lock(&ipf->ipf_lock); | |
1500 | ||
1501 | struct hmap_node *node = hmap_at_position(&ipf->frag_lists, | |
1502 | &ipf_dump_ctx->bucket_pos); | |
1503 | if (!node) { | |
1504 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1505 | return EOF; | |
1506 | } else { | |
1507 | struct ipf_list *ipf_list_; | |
1508 | INIT_CONTAINER(ipf_list_, node, node); | |
1509 | struct ipf_list ipf_list = *ipf_list_; | |
1510 | ovs_mutex_unlock(&ipf->ipf_lock); | |
1511 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1512 | ipf_dump_create(&ipf_list, &ds); | |
1513 | *dump = ds_steal_cstr(&ds); | |
1514 | return 0; | |
1515 | } | |
1516 | } | |
1517 | ||
1518 | /* Frees 'ipf_dump_ctx' allocated by ipf_dump_start(). */ | |
1519 | int | |
1520 | ipf_dump_done(struct ipf_dump_ctx *ipf_dump_ctx) | |
1521 | { | |
1522 | free(ipf_dump_ctx); | |
1523 | return 0; | |
1524 | } |