]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "packets.h" | |
19 | #include <arpa/inet.h> | |
20 | #include <sys/socket.h> | |
21 | #include <netinet/in.h> | |
22 | #include <netinet/ip6.h> | |
23 | #include <stdlib.h> | |
24 | #include "byte-order.h" | |
25 | #include "csum.h" | |
26 | #include "crc32c.h" | |
27 | #include "flow.h" | |
28 | #include "hmap.h" | |
29 | #include "dynamic-string.h" | |
30 | #include "ofpbuf.h" | |
31 | #include "ovs-thread.h" | |
32 | ||
33 | const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; | |
34 | ||
35 | /* Parses 's' as a 16-digit hexadecimal number representing a datapath ID. On | |
36 | * success stores the dpid into '*dpidp' and returns true, on failure stores 0 | |
37 | * into '*dpidp' and returns false. | |
38 | * | |
39 | * Rejects an all-zeros dpid as invalid. */ | |
40 | bool | |
41 | dpid_from_string(const char *s, uint64_t *dpidp) | |
42 | { | |
43 | *dpidp = (strlen(s) == 16 && strspn(s, "0123456789abcdefABCDEF") == 16 | |
44 | ? strtoull(s, NULL, 16) | |
45 | : 0); | |
46 | return *dpidp != 0; | |
47 | } | |
48 | ||
49 | /* Returns true if 'ea' is a reserved address, that a bridge must never | |
50 | * forward, false otherwise. | |
51 | * | |
52 | * If you change this function's behavior, please update corresponding | |
53 | * documentation in vswitch.xml at the same time. */ | |
54 | bool | |
55 | eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) | |
56 | { | |
57 | struct eth_addr_node { | |
58 | struct hmap_node hmap_node; | |
59 | const uint64_t ea64; | |
60 | }; | |
61 | ||
62 | static struct eth_addr_node nodes[] = { | |
63 | /* STP, IEEE pause frames, and other reserved protocols. */ | |
64 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL }, | |
65 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL }, | |
66 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL }, | |
67 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL }, | |
68 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL }, | |
69 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL }, | |
70 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL }, | |
71 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL }, | |
72 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL }, | |
73 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL }, | |
74 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL }, | |
75 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL }, | |
76 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL }, | |
77 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL }, | |
78 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL }, | |
79 | { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL }, | |
80 | ||
81 | /* Extreme protocols. */ | |
82 | { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */ | |
83 | { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000004ULL }, /* EAPS. */ | |
84 | { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000006ULL }, /* EAPS. */ | |
85 | ||
86 | /* Cisco protocols. */ | |
87 | { HMAP_NODE_NULL_INITIALIZER, 0x01000c000000ULL }, /* ISL. */ | |
88 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccccULL }, /* PAgP, UDLD, CDP, | |
89 | * DTP, VTP. */ | |
90 | { HMAP_NODE_NULL_INITIALIZER, 0x01000ccccccdULL }, /* PVST+. */ | |
91 | { HMAP_NODE_NULL_INITIALIZER, 0x01000ccdcdcdULL }, /* STP Uplink Fast, | |
92 | * FlexLink. */ | |
93 | ||
94 | /* Cisco CFM. */ | |
95 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc0ULL }, | |
96 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc1ULL }, | |
97 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc2ULL }, | |
98 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc3ULL }, | |
99 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc4ULL }, | |
100 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc5ULL }, | |
101 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc6ULL }, | |
102 | { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL }, | |
103 | }; | |
104 | ||
105 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
106 | struct eth_addr_node *node; | |
107 | static struct hmap addrs; | |
108 | uint64_t ea64; | |
109 | ||
110 | if (ovsthread_once_start(&once)) { | |
111 | hmap_init(&addrs); | |
112 | for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) { | |
113 | hmap_insert(&addrs, &node->hmap_node, | |
114 | hash_2words(node->ea64, node->ea64 >> 32)); | |
115 | } | |
116 | ovsthread_once_done(&once); | |
117 | } | |
118 | ||
119 | ea64 = eth_addr_to_uint64(ea); | |
120 | HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_2words(ea64, ea64 >> 32), | |
121 | &addrs) { | |
122 | if (node->ea64 == ea64) { | |
123 | return true; | |
124 | } | |
125 | } | |
126 | return false; | |
127 | } | |
128 | ||
129 | bool | |
130 | eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) | |
131 | { | |
132 | if (sscanf(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea)) | |
133 | == ETH_ADDR_SCAN_COUNT) { | |
134 | return true; | |
135 | } else { | |
136 | memset(ea, 0, ETH_ADDR_LEN); | |
137 | return false; | |
138 | } | |
139 | } | |
140 | ||
141 | /* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'. | |
142 | * This function is used by Open vSwitch to compose packets in cases where | |
143 | * context is important but content doesn't (or shouldn't) matter. | |
144 | * | |
145 | * The returned packet has enough headroom to insert an 802.1Q VLAN header if | |
146 | * desired. */ | |
147 | void | |
148 | compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) | |
149 | { | |
150 | struct eth_header *eth; | |
151 | struct arp_eth_header *arp; | |
152 | ||
153 | ofpbuf_clear(b); | |
154 | ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN | |
155 | + ARP_ETH_HEADER_LEN); | |
156 | ofpbuf_reserve(b, VLAN_HEADER_LEN); | |
157 | eth = ofpbuf_put_uninit(b, sizeof *eth); | |
158 | memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); | |
159 | memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); | |
160 | eth->eth_type = htons(ETH_TYPE_RARP); | |
161 | ||
162 | arp = ofpbuf_put_uninit(b, sizeof *arp); | |
163 | arp->ar_hrd = htons(ARP_HRD_ETHERNET); | |
164 | arp->ar_pro = htons(ARP_PRO_IP); | |
165 | arp->ar_hln = sizeof arp->ar_sha; | |
166 | arp->ar_pln = sizeof arp->ar_spa; | |
167 | arp->ar_op = htons(ARP_OP_RARP); | |
168 | memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN); | |
169 | arp->ar_spa = htonl(0); | |
170 | memcpy(arp->ar_tha, eth_src, ETH_ADDR_LEN); | |
171 | arp->ar_tpa = htonl(0); | |
172 | } | |
173 | ||
174 | /* Insert VLAN header according to given TCI. Packet passed must be Ethernet | |
175 | * packet. Ignores the CFI bit of 'tci' using 0 instead. | |
176 | * | |
177 | * Also sets 'packet->l2' to point to the new Ethernet header. */ | |
178 | void | |
179 | eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci) | |
180 | { | |
181 | struct eth_header *eh = packet->data; | |
182 | struct vlan_eth_header *veh; | |
183 | ||
184 | /* Insert new 802.1Q header. */ | |
185 | struct vlan_eth_header tmp; | |
186 | memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); | |
187 | memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); | |
188 | tmp.veth_type = htons(ETH_TYPE_VLAN); | |
189 | tmp.veth_tci = tci & htons(~VLAN_CFI); | |
190 | tmp.veth_next_type = eh->eth_type; | |
191 | ||
192 | veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); | |
193 | memcpy(veh, &tmp, sizeof tmp); | |
194 | ||
195 | packet->l2 = packet->data; | |
196 | } | |
197 | ||
198 | /* Removes outermost VLAN header (if any is present) from 'packet'. | |
199 | * | |
200 | * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header | |
201 | * or may be NULL if there are no MPLS headers. */ | |
202 | void | |
203 | eth_pop_vlan(struct ofpbuf *packet) | |
204 | { | |
205 | struct vlan_eth_header *veh = packet->l2; | |
206 | if (packet->size >= sizeof *veh | |
207 | && veh->veth_type == htons(ETH_TYPE_VLAN)) { | |
208 | struct eth_header tmp; | |
209 | ||
210 | memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); | |
211 | memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); | |
212 | tmp.eth_type = veh->veth_next_type; | |
213 | ||
214 | ofpbuf_pull(packet, VLAN_HEADER_LEN); | |
215 | packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; | |
216 | memcpy(packet->data, &tmp, sizeof tmp); | |
217 | } | |
218 | } | |
219 | ||
220 | /* Return depth of mpls stack. | |
221 | * | |
222 | * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header | |
223 | * or may be NULL if there are no MPLS headers. */ | |
224 | uint16_t | |
225 | eth_mpls_depth(const struct ofpbuf *packet) | |
226 | { | |
227 | struct mpls_hdr *mh = packet->l2_5; | |
228 | uint16_t depth; | |
229 | ||
230 | if (!mh) { | |
231 | return 0; | |
232 | } | |
233 | ||
234 | depth = 0; | |
235 | while (packet->size >= ((char *)mh - (char *)packet->data) + sizeof *mh) { | |
236 | depth++; | |
237 | if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { | |
238 | break; | |
239 | } | |
240 | mh++; | |
241 | } | |
242 | ||
243 | return depth; | |
244 | } | |
245 | ||
246 | /* Set ethertype of the packet. */ | |
247 | void | |
248 | set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type) | |
249 | { | |
250 | struct eth_header *eh = packet->data; | |
251 | ||
252 | if (eh->eth_type == htons(ETH_TYPE_VLAN)) { | |
253 | ovs_be16 *p; | |
254 | p = ALIGNED_CAST(ovs_be16 *, | |
255 | (char *)(packet->l2_5 ? packet->l2_5 : packet->l3) - 2); | |
256 | *p = eth_type; | |
257 | } else { | |
258 | eh->eth_type = eth_type; | |
259 | } | |
260 | } | |
261 | ||
262 | static bool is_mpls(struct ofpbuf *packet) | |
263 | { | |
264 | return packet->l2_5 != NULL; | |
265 | } | |
266 | ||
267 | /* Set time to live (TTL) of an MPLS label stack entry (LSE). */ | |
268 | void | |
269 | set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl) | |
270 | { | |
271 | *lse &= ~htonl(MPLS_TTL_MASK); | |
272 | *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK); | |
273 | } | |
274 | ||
275 | /* Set traffic class (TC) of an MPLS label stack entry (LSE). */ | |
276 | void | |
277 | set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc) | |
278 | { | |
279 | *lse &= ~htonl(MPLS_TC_MASK); | |
280 | *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK); | |
281 | } | |
282 | ||
283 | /* Set label of an MPLS label stack entry (LSE). */ | |
284 | void | |
285 | set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label) | |
286 | { | |
287 | *lse &= ~htonl(MPLS_LABEL_MASK); | |
288 | *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK); | |
289 | } | |
290 | ||
291 | /* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */ | |
292 | void | |
293 | set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos) | |
294 | { | |
295 | *lse &= ~htonl(MPLS_BOS_MASK); | |
296 | *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK); | |
297 | } | |
298 | ||
299 | /* Compose an MPLS label stack entry (LSE) from its components: | |
300 | * label, traffic class (TC), time to live (TTL) and | |
301 | * bottom of stack (BoS) bit. */ | |
302 | ovs_be32 | |
303 | set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label) | |
304 | { | |
305 | ovs_be32 lse = htonl(0); | |
306 | set_mpls_lse_ttl(&lse, ttl); | |
307 | set_mpls_lse_tc(&lse, tc); | |
308 | set_mpls_lse_bos(&lse, bos); | |
309 | set_mpls_lse_label(&lse, label); | |
310 | return lse; | |
311 | } | |
312 | ||
313 | /* Push an new MPLS stack entry onto the MPLS stack and adjust 'packet->l2' and | |
314 | * 'packet->l2_5' accordingly. The new entry will be the outermost entry on | |
315 | * the stack. | |
316 | * | |
317 | * Previous to calling this function, 'packet->l2_5' must be set; if the MPLS | |
318 | * label to be pushed will be the first label in 'packet', then it should be | |
319 | * the same as 'packet->l3'. */ | |
320 | static void | |
321 | push_mpls_lse(struct ofpbuf *packet, struct mpls_hdr *mh) | |
322 | { | |
323 | char * header; | |
324 | size_t len; | |
325 | header = ofpbuf_push_uninit(packet, MPLS_HLEN); | |
326 | len = (char *)packet->l2_5 - (char *)packet->l2; | |
327 | memmove(header, packet->l2, len); | |
328 | memcpy(header + len, mh, sizeof *mh); | |
329 | packet->l2 = (char*)packet->l2 - MPLS_HLEN; | |
330 | packet->l2_5 = (char*)packet->l2_5 - MPLS_HLEN; | |
331 | } | |
332 | ||
333 | /* Set MPLS label stack entry to outermost MPLS header.*/ | |
334 | void | |
335 | set_mpls_lse(struct ofpbuf *packet, ovs_be32 mpls_lse) | |
336 | { | |
337 | struct mpls_hdr *mh = packet->l2_5; | |
338 | ||
339 | /* Packet type should be MPLS to set label stack entry. */ | |
340 | if (is_mpls(packet)) { | |
341 | /* Update mpls label stack entry. */ | |
342 | mh->mpls_lse = mpls_lse; | |
343 | } | |
344 | } | |
345 | ||
346 | /* Push MPLS label stack entry 'lse' onto 'packet' as the the outermost MPLS | |
347 | * header. If 'packet' does not already have any MPLS labels, then its | |
348 | * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */ | |
349 | void | |
350 | push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse) | |
351 | { | |
352 | struct mpls_hdr mh; | |
353 | ||
354 | if (!eth_type_mpls(ethtype)) { | |
355 | return; | |
356 | } | |
357 | ||
358 | if (!is_mpls(packet)) { | |
359 | /* Set ethtype and MPLS label stack entry. */ | |
360 | set_ethertype(packet, ethtype); | |
361 | packet->l2_5 = packet->l3; | |
362 | } | |
363 | ||
364 | /* Push new MPLS shim header onto packet. */ | |
365 | mh.mpls_lse = lse; | |
366 | push_mpls_lse(packet, &mh); | |
367 | } | |
368 | ||
369 | /* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry. | |
370 | * If the label that was removed was the only MPLS label, changes 'packet''s | |
371 | * Ethertype to 'ethtype' (which ordinarily should not be an MPLS | |
372 | * Ethertype). */ | |
373 | void | |
374 | pop_mpls(struct ofpbuf *packet, ovs_be16 ethtype) | |
375 | { | |
376 | struct mpls_hdr *mh = NULL; | |
377 | ||
378 | if (is_mpls(packet)) { | |
379 | size_t len; | |
380 | mh = packet->l2_5; | |
381 | len = (char*)packet->l2_5 - (char*)packet->l2; | |
382 | set_ethertype(packet, ethtype); | |
383 | if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { | |
384 | packet->l2_5 = NULL; | |
385 | } else { | |
386 | packet->l2_5 = (char*)packet->l2_5 + MPLS_HLEN; | |
387 | } | |
388 | /* Shift the l2 header forward. */ | |
389 | memmove((char*)packet->data + MPLS_HLEN, packet->data, len); | |
390 | packet->size -= MPLS_HLEN; | |
391 | packet->data = (char*)packet->data + MPLS_HLEN; | |
392 | packet->l2 = (char*)packet->l2 + MPLS_HLEN; | |
393 | } | |
394 | } | |
395 | ||
396 | /* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The | |
397 | * caller must free '*packetp'. On success, returns NULL. On failure, returns | |
398 | * an error message and stores NULL in '*packetp'. */ | |
399 | const char * | |
400 | eth_from_hex(const char *hex, struct ofpbuf **packetp) | |
401 | { | |
402 | struct ofpbuf *packet; | |
403 | ||
404 | packet = *packetp = ofpbuf_new(strlen(hex) / 2); | |
405 | ||
406 | if (ofpbuf_put_hex(packet, hex, NULL)[0] != '\0') { | |
407 | ofpbuf_delete(packet); | |
408 | *packetp = NULL; | |
409 | return "Trailing garbage in packet data"; | |
410 | } | |
411 | ||
412 | if (packet->size < ETH_HEADER_LEN) { | |
413 | ofpbuf_delete(packet); | |
414 | *packetp = NULL; | |
415 | return "Packet data too short for Ethernet"; | |
416 | } | |
417 | ||
418 | return NULL; | |
419 | } | |
420 | ||
421 | void | |
422 | eth_format_masked(const uint8_t eth[ETH_ADDR_LEN], | |
423 | const uint8_t mask[ETH_ADDR_LEN], struct ds *s) | |
424 | { | |
425 | ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth)); | |
426 | if (mask && !eth_mask_is_exact(mask)) { | |
427 | ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(mask)); | |
428 | } | |
429 | } | |
430 | ||
431 | void | |
432 | eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], | |
433 | const uint8_t mask[ETH_ADDR_LEN], | |
434 | uint8_t dst[ETH_ADDR_LEN]) | |
435 | { | |
436 | int i; | |
437 | ||
438 | for (i = 0; i < ETH_ADDR_LEN; i++) { | |
439 | dst[i] = src[i] & mask[i]; | |
440 | } | |
441 | } | |
442 | ||
443 | /* Given the IP netmask 'netmask', returns the number of bits of the IP address | |
444 | * that it specifies, that is, the number of 1-bits in 'netmask'. | |
445 | * | |
446 | * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will | |
447 | * still be in the valid range but isn't otherwise meaningful. */ | |
448 | int | |
449 | ip_count_cidr_bits(ovs_be32 netmask) | |
450 | { | |
451 | return 32 - ctz(ntohl(netmask)); | |
452 | } | |
453 | ||
454 | void | |
455 | ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s) | |
456 | { | |
457 | ds_put_format(s, IP_FMT, IP_ARGS(ip)); | |
458 | if (mask != htonl(UINT32_MAX)) { | |
459 | if (ip_is_cidr(mask)) { | |
460 | ds_put_format(s, "/%d", ip_count_cidr_bits(mask)); | |
461 | } else { | |
462 | ds_put_format(s, "/"IP_FMT, IP_ARGS(mask)); | |
463 | } | |
464 | } | |
465 | } | |
466 | ||
467 | ||
468 | /* Stores the string representation of the IPv6 address 'addr' into the | |
469 | * character array 'addr_str', which must be at least INET6_ADDRSTRLEN | |
470 | * bytes long. */ | |
471 | void | |
472 | format_ipv6_addr(char *addr_str, const struct in6_addr *addr) | |
473 | { | |
474 | inet_ntop(AF_INET6, addr, addr_str, INET6_ADDRSTRLEN); | |
475 | } | |
476 | ||
477 | void | |
478 | print_ipv6_addr(struct ds *string, const struct in6_addr *addr) | |
479 | { | |
480 | char *dst; | |
481 | ||
482 | ds_reserve(string, string->length + INET6_ADDRSTRLEN); | |
483 | ||
484 | dst = string->string + string->length; | |
485 | format_ipv6_addr(dst, addr); | |
486 | string->length += strlen(dst); | |
487 | } | |
488 | ||
489 | void | |
490 | print_ipv6_masked(struct ds *s, const struct in6_addr *addr, | |
491 | const struct in6_addr *mask) | |
492 | { | |
493 | print_ipv6_addr(s, addr); | |
494 | if (mask && !ipv6_mask_is_exact(mask)) { | |
495 | if (ipv6_is_cidr(mask)) { | |
496 | int cidr_bits = ipv6_count_cidr_bits(mask); | |
497 | ds_put_format(s, "/%d", cidr_bits); | |
498 | } else { | |
499 | ds_put_char(s, '/'); | |
500 | print_ipv6_addr(s, mask); | |
501 | } | |
502 | } | |
503 | } | |
504 | ||
505 | struct in6_addr ipv6_addr_bitand(const struct in6_addr *a, | |
506 | const struct in6_addr *b) | |
507 | { | |
508 | int i; | |
509 | struct in6_addr dst; | |
510 | ||
511 | #ifdef s6_addr32 | |
512 | for (i=0; i<4; i++) { | |
513 | dst.s6_addr32[i] = a->s6_addr32[i] & b->s6_addr32[i]; | |
514 | } | |
515 | #else | |
516 | for (i=0; i<16; i++) { | |
517 | dst.s6_addr[i] = a->s6_addr[i] & b->s6_addr[i]; | |
518 | } | |
519 | #endif | |
520 | ||
521 | return dst; | |
522 | } | |
523 | ||
524 | /* Returns an in6_addr consisting of 'mask' high-order 1-bits and 128-N | |
525 | * low-order 0-bits. */ | |
526 | struct in6_addr | |
527 | ipv6_create_mask(int mask) | |
528 | { | |
529 | struct in6_addr netmask; | |
530 | uint8_t *netmaskp = &netmask.s6_addr[0]; | |
531 | ||
532 | memset(&netmask, 0, sizeof netmask); | |
533 | while (mask > 8) { | |
534 | *netmaskp = 0xff; | |
535 | netmaskp++; | |
536 | mask -= 8; | |
537 | } | |
538 | ||
539 | if (mask) { | |
540 | *netmaskp = 0xff << (8 - mask); | |
541 | } | |
542 | ||
543 | return netmask; | |
544 | } | |
545 | ||
546 | /* Given the IPv6 netmask 'netmask', returns the number of bits of the IPv6 | |
547 | * address that it specifies, that is, the number of 1-bits in 'netmask'. | |
548 | * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). | |
549 | * | |
550 | * If 'netmask' is not a CIDR netmask (see ipv6_is_cidr()), the return value | |
551 | * will still be in the valid range but isn't otherwise meaningful. */ | |
552 | int | |
553 | ipv6_count_cidr_bits(const struct in6_addr *netmask) | |
554 | { | |
555 | int i; | |
556 | int count = 0; | |
557 | const uint8_t *netmaskp = &netmask->s6_addr[0]; | |
558 | ||
559 | for (i=0; i<16; i++) { | |
560 | if (netmaskp[i] == 0xff) { | |
561 | count += 8; | |
562 | } else { | |
563 | uint8_t nm; | |
564 | ||
565 | for(nm = netmaskp[i]; nm; nm <<= 1) { | |
566 | count++; | |
567 | } | |
568 | break; | |
569 | } | |
570 | ||
571 | } | |
572 | ||
573 | return count; | |
574 | } | |
575 | ||
576 | /* Returns true if 'netmask' is a CIDR netmask, that is, if it consists of N | |
577 | * high-order 1-bits and 128-N low-order 0-bits. */ | |
578 | bool | |
579 | ipv6_is_cidr(const struct in6_addr *netmask) | |
580 | { | |
581 | const uint8_t *netmaskp = &netmask->s6_addr[0]; | |
582 | int i; | |
583 | ||
584 | for (i=0; i<16; i++) { | |
585 | if (netmaskp[i] != 0xff) { | |
586 | uint8_t x = ~netmaskp[i]; | |
587 | if (x & (x + 1)) { | |
588 | return false; | |
589 | } | |
590 | while (++i < 16) { | |
591 | if (netmaskp[i]) { | |
592 | return false; | |
593 | } | |
594 | } | |
595 | } | |
596 | } | |
597 | ||
598 | return true; | |
599 | } | |
600 | ||
601 | /* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', | |
602 | * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated | |
603 | * in 'b' and returned. This payload may be populated with appropriate | |
604 | * information by the caller. Sets 'b''s 'l2' and 'l3' pointers to the | |
605 | * Ethernet header and payload respectively. | |
606 | * | |
607 | * The returned packet has enough headroom to insert an 802.1Q VLAN header if | |
608 | * desired. */ | |
609 | void * | |
610 | eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], | |
611 | const uint8_t eth_src[ETH_ADDR_LEN], uint16_t eth_type, | |
612 | size_t size) | |
613 | { | |
614 | void *data; | |
615 | struct eth_header *eth; | |
616 | ||
617 | ofpbuf_clear(b); | |
618 | ||
619 | ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + size); | |
620 | ofpbuf_reserve(b, VLAN_HEADER_LEN); | |
621 | eth = ofpbuf_put_uninit(b, ETH_HEADER_LEN); | |
622 | data = ofpbuf_put_uninit(b, size); | |
623 | ||
624 | memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); | |
625 | memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); | |
626 | eth->eth_type = htons(eth_type); | |
627 | ||
628 | b->l2 = eth; | |
629 | b->l3 = data; | |
630 | ||
631 | return data; | |
632 | } | |
633 | ||
634 | static void | |
635 | packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) | |
636 | { | |
637 | struct ip_header *nh = packet->l3; | |
638 | ||
639 | if (nh->ip_proto == IPPROTO_TCP && packet->l7) { | |
640 | struct tcp_header *th = packet->l4; | |
641 | ||
642 | th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr); | |
643 | } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) { | |
644 | struct udp_header *uh = packet->l4; | |
645 | ||
646 | if (uh->udp_csum) { | |
647 | uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr); | |
648 | if (!uh->udp_csum) { | |
649 | uh->udp_csum = htons(0xffff); | |
650 | } | |
651 | } | |
652 | } | |
653 | nh->ip_csum = recalc_csum32(nh->ip_csum, *addr, new_addr); | |
654 | *addr = new_addr; | |
655 | } | |
656 | ||
657 | /* Returns true, if packet contains at least one routing header where | |
658 | * segements_left > 0. | |
659 | * | |
660 | * This function assumes that L3 and L4 markers are set in the packet. */ | |
661 | static bool | |
662 | packet_rh_present(struct ofpbuf *packet) | |
663 | { | |
664 | const struct ip6_hdr *nh; | |
665 | int nexthdr; | |
666 | size_t len; | |
667 | size_t remaining; | |
668 | uint8_t *data = packet->l3; | |
669 | ||
670 | remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3; | |
671 | ||
672 | if (remaining < sizeof *nh) { | |
673 | return false; | |
674 | } | |
675 | nh = ALIGNED_CAST(struct ip6_hdr *, data); | |
676 | data += sizeof *nh; | |
677 | remaining -= sizeof *nh; | |
678 | nexthdr = nh->ip6_nxt; | |
679 | ||
680 | while (1) { | |
681 | if ((nexthdr != IPPROTO_HOPOPTS) | |
682 | && (nexthdr != IPPROTO_ROUTING) | |
683 | && (nexthdr != IPPROTO_DSTOPTS) | |
684 | && (nexthdr != IPPROTO_AH) | |
685 | && (nexthdr != IPPROTO_FRAGMENT)) { | |
686 | /* It's either a terminal header (e.g., TCP, UDP) or one we | |
687 | * don't understand. In either case, we're done with the | |
688 | * packet, so use it to fill in 'nw_proto'. */ | |
689 | break; | |
690 | } | |
691 | ||
692 | /* We only verify that at least 8 bytes of the next header are | |
693 | * available, but many of these headers are longer. Ensure that | |
694 | * accesses within the extension header are within those first 8 | |
695 | * bytes. All extension headers are required to be at least 8 | |
696 | * bytes. */ | |
697 | if (remaining < 8) { | |
698 | return false; | |
699 | } | |
700 | ||
701 | if (nexthdr == IPPROTO_AH) { | |
702 | /* A standard AH definition isn't available, but the fields | |
703 | * we care about are in the same location as the generic | |
704 | * option header--only the header length is calculated | |
705 | * differently. */ | |
706 | const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; | |
707 | ||
708 | nexthdr = ext_hdr->ip6e_nxt; | |
709 | len = (ext_hdr->ip6e_len + 2) * 4; | |
710 | } else if (nexthdr == IPPROTO_FRAGMENT) { | |
711 | const struct ip6_frag *frag_hdr = ALIGNED_CAST(struct ip6_frag *, | |
712 | data); | |
713 | ||
714 | nexthdr = frag_hdr->ip6f_nxt; | |
715 | len = sizeof *frag_hdr; | |
716 | } else if (nexthdr == IPPROTO_ROUTING) { | |
717 | const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; | |
718 | ||
719 | if (rh->ip6r_segleft > 0) { | |
720 | return true; | |
721 | } | |
722 | ||
723 | nexthdr = rh->ip6r_nxt; | |
724 | len = (rh->ip6r_len + 1) * 8; | |
725 | } else { | |
726 | const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; | |
727 | ||
728 | nexthdr = ext_hdr->ip6e_nxt; | |
729 | len = (ext_hdr->ip6e_len + 1) * 8; | |
730 | } | |
731 | ||
732 | if (remaining < len) { | |
733 | return false; | |
734 | } | |
735 | remaining -= len; | |
736 | data += len; | |
737 | } | |
738 | ||
739 | return false; | |
740 | } | |
741 | ||
742 | static void | |
743 | packet_update_csum128(struct ofpbuf *packet, uint8_t proto, | |
744 | ovs_be32 addr[4], const ovs_be32 new_addr[4]) | |
745 | { | |
746 | if (proto == IPPROTO_TCP && packet->l7) { | |
747 | struct tcp_header *th = packet->l4; | |
748 | ||
749 | th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); | |
750 | } else if (proto == IPPROTO_UDP && packet->l7) { | |
751 | struct udp_header *uh = packet->l4; | |
752 | ||
753 | if (uh->udp_csum) { | |
754 | uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); | |
755 | if (!uh->udp_csum) { | |
756 | uh->udp_csum = htons(0xffff); | |
757 | } | |
758 | } | |
759 | } | |
760 | } | |
761 | ||
762 | static void | |
763 | packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto, | |
764 | struct in6_addr *addr, const ovs_be32 new_addr[4], | |
765 | bool recalculate_csum) | |
766 | { | |
767 | if (recalculate_csum) { | |
768 | packet_update_csum128(packet, proto, (ovs_be32 *)addr, new_addr); | |
769 | } | |
770 | memcpy(addr, new_addr, sizeof(*addr)); | |
771 | } | |
772 | ||
773 | static void | |
774 | packet_set_ipv6_flow_label(ovs_be32 *flow_label, ovs_be32 flow_key) | |
775 | { | |
776 | *flow_label = (*flow_label & htonl(~IPV6_LABEL_MASK)) | flow_key; | |
777 | } | |
778 | ||
779 | static void | |
780 | packet_set_ipv6_tc(ovs_be32 *flow_label, uint8_t tc) | |
781 | { | |
782 | *flow_label = (*flow_label & htonl(0xF00FFFFF)) | htonl(tc << 20); | |
783 | } | |
784 | ||
785 | /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', | |
786 | * 'dst', 'tos', and 'ttl'. Updates 'packet''s L4 checksums as appropriate. | |
787 | * 'packet' must contain a valid IPv4 packet with correctly populated l[347] | |
788 | * markers. */ | |
789 | void | |
790 | packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, | |
791 | uint8_t tos, uint8_t ttl) | |
792 | { | |
793 | struct ip_header *nh = packet->l3; | |
794 | ||
795 | if (nh->ip_src != src) { | |
796 | packet_set_ipv4_addr(packet, &nh->ip_src, src); | |
797 | } | |
798 | ||
799 | if (nh->ip_dst != dst) { | |
800 | packet_set_ipv4_addr(packet, &nh->ip_dst, dst); | |
801 | } | |
802 | ||
803 | if (nh->ip_tos != tos) { | |
804 | uint8_t *field = &nh->ip_tos; | |
805 | ||
806 | nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field), | |
807 | htons((uint16_t) tos)); | |
808 | *field = tos; | |
809 | } | |
810 | ||
811 | if (nh->ip_ttl != ttl) { | |
812 | uint8_t *field = &nh->ip_ttl; | |
813 | ||
814 | nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8), | |
815 | htons(ttl << 8)); | |
816 | *field = ttl; | |
817 | } | |
818 | } | |
819 | ||
820 | /* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', | |
821 | * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as | |
822 | * appropriate. 'packet' must contain a valid IPv6 packet with correctly | |
823 | * populated l[347] markers. */ | |
824 | void | |
825 | packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4], | |
826 | const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, | |
827 | uint8_t key_hl) | |
828 | { | |
829 | struct ip6_hdr *nh = packet->l3; | |
830 | ||
831 | if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { | |
832 | packet_set_ipv6_addr(packet, proto, &nh->ip6_src, src, true); | |
833 | } | |
834 | ||
835 | if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { | |
836 | packet_set_ipv6_addr(packet, proto, &nh->ip6_dst, dst, | |
837 | !packet_rh_present(packet)); | |
838 | } | |
839 | ||
840 | packet_set_ipv6_tc(&nh->ip6_flow, key_tc); | |
841 | ||
842 | packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); | |
843 | ||
844 | nh->ip6_hlim = key_hl; | |
845 | } | |
846 | ||
847 | static void | |
848 | packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) | |
849 | { | |
850 | if (*port != new_port) { | |
851 | *csum = recalc_csum16(*csum, *port, new_port); | |
852 | *port = new_port; | |
853 | } | |
854 | } | |
855 | ||
856 | /* Sets the TCP source and destination port ('src' and 'dst' respectively) of | |
857 | * the TCP header contained in 'packet'. 'packet' must be a valid TCP packet | |
858 | * with its l4 marker properly populated. */ | |
859 | void | |
860 | packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) | |
861 | { | |
862 | struct tcp_header *th = packet->l4; | |
863 | ||
864 | packet_set_port(&th->tcp_src, src, &th->tcp_csum); | |
865 | packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); | |
866 | } | |
867 | ||
868 | /* Sets the UDP source and destination port ('src' and 'dst' respectively) of | |
869 | * the UDP header contained in 'packet'. 'packet' must be a valid UDP packet | |
870 | * with its l4 marker properly populated. */ | |
871 | void | |
872 | packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) | |
873 | { | |
874 | struct udp_header *uh = packet->l4; | |
875 | ||
876 | if (uh->udp_csum) { | |
877 | packet_set_port(&uh->udp_src, src, &uh->udp_csum); | |
878 | packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); | |
879 | ||
880 | if (!uh->udp_csum) { | |
881 | uh->udp_csum = htons(0xffff); | |
882 | } | |
883 | } else { | |
884 | uh->udp_src = src; | |
885 | uh->udp_dst = dst; | |
886 | } | |
887 | } | |
888 | ||
889 | /* Sets the SCTP source and destination port ('src' and 'dst' respectively) of | |
890 | * the SCTP header contained in 'packet'. 'packet' must be a valid SCTP packet | |
891 | * with its l4 marker properly populated. */ | |
892 | void | |
893 | packet_set_sctp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) | |
894 | { | |
895 | struct sctp_header *sh = packet->l4; | |
896 | ovs_be32 old_csum, old_correct_csum, new_csum; | |
897 | uint16_t tp_len = packet->size - ((uint8_t*)sh - (uint8_t*)packet->data); | |
898 | ||
899 | old_csum = sh->sctp_csum; | |
900 | sh->sctp_csum = 0; | |
901 | old_correct_csum = crc32c(packet->l4, tp_len); | |
902 | ||
903 | sh->sctp_src = src; | |
904 | sh->sctp_dst = dst; | |
905 | ||
906 | new_csum = crc32c(packet->l4, tp_len); | |
907 | sh->sctp_csum = old_csum ^ old_correct_csum ^ new_csum; | |
908 | } | |
909 | ||
910 | /* If 'packet' is a TCP packet, returns the TCP flags. Otherwise, returns 0. | |
911 | * | |
912 | * 'flow' must be the flow corresponding to 'packet' and 'packet''s header | |
913 | * pointers must be properly initialized (e.g. with flow_extract()). */ | |
914 | uint8_t | |
915 | packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow) | |
916 | { | |
917 | if (dl_type_is_ip_any(flow->dl_type) && | |
918 | flow->nw_proto == IPPROTO_TCP && packet->l7) { | |
919 | const struct tcp_header *tcp = packet->l4; | |
920 | return TCP_FLAGS(tcp->tcp_ctl); | |
921 | } else { | |
922 | return 0; | |
923 | } | |
924 | } | |
925 | ||
926 | /* Appends a string representation of the TCP flags value 'tcp_flags' | |
927 | * (e.g. obtained via packet_get_tcp_flags() or TCP_FLAGS) to 's', in the | |
928 | * format used by tcpdump. */ | |
929 | void | |
930 | packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) | |
931 | { | |
932 | if (!tcp_flags) { | |
933 | ds_put_cstr(s, "none"); | |
934 | return; | |
935 | } | |
936 | ||
937 | if (tcp_flags & TCP_SYN) { | |
938 | ds_put_char(s, 'S'); | |
939 | } | |
940 | if (tcp_flags & TCP_FIN) { | |
941 | ds_put_char(s, 'F'); | |
942 | } | |
943 | if (tcp_flags & TCP_PSH) { | |
944 | ds_put_char(s, 'P'); | |
945 | } | |
946 | if (tcp_flags & TCP_RST) { | |
947 | ds_put_char(s, 'R'); | |
948 | } | |
949 | if (tcp_flags & TCP_URG) { | |
950 | ds_put_char(s, 'U'); | |
951 | } | |
952 | if (tcp_flags & TCP_ACK) { | |
953 | ds_put_char(s, '.'); | |
954 | } | |
955 | if (tcp_flags & 0x40) { | |
956 | ds_put_cstr(s, "[40]"); | |
957 | } | |
958 | if (tcp_flags & 0x80) { | |
959 | ds_put_cstr(s, "[80]"); | |
960 | } | |
961 | } |