]> git.proxmox.com Git - mirror_ovs.git/blame - lib/packets.c
ofproto: Delete all groups and meters when (un)configuring a controller.
[mirror_ovs.git] / lib / packets.c
CommitLineData
b9e8b45a 1/*
6335d074 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
b9e8b45a
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "packets.h"
b2befd5b
BP
19#include <sys/types.h>
20#include <netinet/in.h>
d31f1109 21#include <arpa/inet.h>
6ca00f6f 22#include <sys/socket.h>
bc7a5acd 23#include <netinet/ip6.h>
00894212 24#include <netinet/icmp6.h>
76343538 25#include <stdlib.h>
e463f310 26#include <netdb.h>
d31f1109 27#include "byte-order.h"
c97664b3 28#include "csum.h"
c6bcb685 29#include "crc32c.h"
12113c39 30#include "flow.h"
ee89ea7b 31#include "openvswitch/hmap.h"
3e8a2ad1 32#include "openvswitch/dynamic-string.h"
8c45d00f 33#include "ovs-thread.h"
b5e7e61a 34#include "odp-util.h"
cf62fa4c 35#include "dp-packet.h"
7c457c33 36#include "unaligned.h"
b9e8b45a 37
d31f1109 38const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT;
06994f87 39const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT;
b24ab67c 40const struct in6_addr in6addr_all_routers = IN6ADDR_ALL_ROUTERS_INIT;
d31f1109 41
ffe4c74f
JB
42struct in6_addr
43flow_tnl_dst(const struct flow_tnl *tnl)
44{
12d0ee08 45 return tnl->ip_dst ? in6_addr_mapped_ipv4(tnl->ip_dst) : tnl->ipv6_dst;
ffe4c74f
JB
46}
47
48struct in6_addr
49flow_tnl_src(const struct flow_tnl *tnl)
50{
12d0ee08 51 return tnl->ip_src ? in6_addr_mapped_ipv4(tnl->ip_src) : tnl->ipv6_src;
ffe4c74f
JB
52}
53
62705b81
BP
54/* Returns true if 's' consists entirely of hex digits, false otherwise. */
55static bool
56is_all_hex(const char *s)
57{
58 return s[strspn(s, "0123456789abcdefABCDEF")] == '\0';
59}
60
093ca5b3
BP
61/* Parses 's' as a 16-digit hexadecimal number representing a datapath ID. On
62 * success stores the dpid into '*dpidp' and returns true, on failure stores 0
63 * into '*dpidp' and returns false.
64 *
65 * Rejects an all-zeros dpid as invalid. */
76343538
BP
66bool
67dpid_from_string(const char *s, uint64_t *dpidp)
68{
62705b81
BP
69 size_t len = strlen(s);
70 *dpidp = ((len == 16 && is_all_hex(s))
71 || (len <= 18 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X')
72 && is_all_hex(s + 2))
093ca5b3 73 ? strtoull(s, NULL, 16)
76343538
BP
74 : 0);
75 return *dpidp != 0;
76}
77
7d48a4cc
BP
78/* Returns true if 'ea' is a reserved address, that a bridge must never
79 * forward, false otherwise.
05be4e2c
EJ
80 *
81 * If you change this function's behavior, please update corresponding
82 * documentation in vswitch.xml at the same time. */
83bool
74ff3298 84eth_addr_is_reserved(const struct eth_addr ea)
05be4e2c 85{
7d48a4cc
BP
86 struct eth_addr_node {
87 struct hmap_node hmap_node;
8c45d00f 88 const uint64_t ea64;
05be4e2c
EJ
89 };
90
7d48a4cc
BP
91 static struct eth_addr_node nodes[] = {
92 /* STP, IEEE pause frames, and other reserved protocols. */
f0ac9da9
BP
93 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL },
94 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL },
95 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL },
96 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL },
97 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL },
98 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL },
99 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL },
100 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL },
101 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL },
102 { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL },
103 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL },
104 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL },
105 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL },
106 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL },
107 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL },
108 { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL },
7d48a4cc
BP
109
110 /* Extreme protocols. */
111 { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */
112 { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000004ULL }, /* EAPS. */
113 { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000006ULL }, /* EAPS. */
114
115 /* Cisco protocols. */
116 { HMAP_NODE_NULL_INITIALIZER, 0x01000c000000ULL }, /* ISL. */
117 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccccULL }, /* PAgP, UDLD, CDP,
118 * DTP, VTP. */
119 { HMAP_NODE_NULL_INITIALIZER, 0x01000ccccccdULL }, /* PVST+. */
120 { HMAP_NODE_NULL_INITIALIZER, 0x01000ccdcdcdULL }, /* STP Uplink Fast,
121 * FlexLink. */
122
123 /* Cisco CFM. */
124 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc0ULL },
125 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc1ULL },
126 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc2ULL },
127 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc3ULL },
128 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc4ULL },
129 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc5ULL },
130 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc6ULL },
131 { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL },
132 };
05be4e2c 133
8c45d00f 134 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
7d48a4cc 135 struct eth_addr_node *node;
8c45d00f 136 static struct hmap addrs;
7d48a4cc 137 uint64_t ea64;
05be4e2c 138
8c45d00f
BP
139 if (ovsthread_once_start(&once)) {
140 hmap_init(&addrs);
7d48a4cc 141 for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) {
965607c8 142 hmap_insert(&addrs, &node->hmap_node, hash_uint64(node->ea64));
7d48a4cc 143 }
8c45d00f 144 ovsthread_once_done(&once);
7d48a4cc 145 }
05be4e2c 146
7d48a4cc 147 ea64 = eth_addr_to_uint64(ea);
965607c8 148 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_uint64(ea64), &addrs) {
7d48a4cc 149 if (node->ea64 == ea64) {
05be4e2c
EJ
150 return true;
151 }
152 }
153 return false;
154}
155
ed4c95c0
BP
156/* Attempts to parse 's' as an Ethernet address. If successful, stores the
157 * address in 'ea' and returns true, otherwise zeros 'ea' and returns
10c3fcdf 158 * false. This function checks trailing characters. */
76343538 159bool
74ff3298 160eth_addr_from_string(const char *s, struct eth_addr *ea)
76343538 161{
10c3fcdf 162 int n = 0;
163 if (ovs_scan(s, ETH_ADDR_SCAN_FMT"%n", ETH_ADDR_SCAN_ARGS(*ea), &n)
164 && !s[n]) {
76343538
BP
165 return true;
166 } else {
74ff3298 167 *ea = eth_addr_zero;
76343538
BP
168 return false;
169 }
170}
171
38f7147c 172/* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'.
b9e8b45a 173 * This function is used by Open vSwitch to compose packets in cases where
38f7147c
EJ
174 * context is important but content doesn't (or shouldn't) matter.
175 *
176 * The returned packet has enough headroom to insert an 802.1Q VLAN header if
177 * desired. */
b9e8b45a 178void
74ff3298 179compose_rarp(struct dp_packet *b, const struct eth_addr eth_src)
b9e8b45a 180{
38f7147c 181 struct eth_header *eth;
7cb57d10 182 struct arp_eth_header *arp;
b9e8b45a 183
cf62fa4c
PS
184 dp_packet_clear(b);
185 dp_packet_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN
7cb57d10 186 + ARP_ETH_HEADER_LEN);
cf62fa4c
PS
187 dp_packet_reserve(b, 2 + VLAN_HEADER_LEN);
188 eth = dp_packet_put_uninit(b, sizeof *eth);
74ff3298
JR
189 eth->eth_dst = eth_addr_broadcast;
190 eth->eth_src = eth_src;
38f7147c
EJ
191 eth->eth_type = htons(ETH_TYPE_RARP);
192
cf62fa4c 193 arp = dp_packet_put_uninit(b, sizeof *arp);
7cb57d10
EJ
194 arp->ar_hrd = htons(ARP_HRD_ETHERNET);
195 arp->ar_pro = htons(ARP_PRO_IP);
196 arp->ar_hln = sizeof arp->ar_sha;
197 arp->ar_pln = sizeof arp->ar_spa;
198 arp->ar_op = htons(ARP_OP_RARP);
74ff3298 199 arp->ar_sha = eth_src;
7c457c33 200 put_16aligned_be32(&arp->ar_spa, htonl(0));
74ff3298 201 arp->ar_tha = eth_src;
7c457c33 202 put_16aligned_be32(&arp->ar_tpa, htonl(0));
cf3b7538 203
82eb5b0a 204 dp_packet_reset_offsets(b);
cf62fa4c 205 dp_packet_set_l3(b, arp);
2482b0b0 206 b->packet_type = htonl(PT_ETH);
b9e8b45a 207}
d31f1109 208
d9065a90 209/* Insert VLAN header according to given TCI. Packet passed must be Ethernet
2f4ca41b 210 * packet. Ignores the CFI bit of 'tci' using 0 instead.
7c66b273 211 *
cf3b7538 212 * Also adjusts the layer offsets accordingly. */
7c66b273 213void
cf62fa4c 214eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci)
7c66b273 215{
7c66b273
BP
216 struct vlan_eth_header *veh;
217
d9065a90 218 /* Insert new 802.1Q header. */
cf62fa4c 219 veh = dp_packet_resize_l2(packet, VLAN_HEADER_LEN);
437d0d22
JR
220 memmove(veh, (char *)veh + VLAN_HEADER_LEN, 2 * ETH_ADDR_LEN);
221 veh->veth_type = tpid;
222 veh->veth_tci = tci & htons(~VLAN_CFI);
7c66b273
BP
223}
224
f4ebc25e
BP
225/* Removes outermost VLAN header (if any is present) from 'packet'.
226 *
d6943394
TH
227 * 'packet->l2_5' should initially point to 'packet''s outer-most VLAN header
228 * or may be NULL if there are no VLAN headers. */
f4ebc25e 229void
cf62fa4c 230eth_pop_vlan(struct dp_packet *packet)
f4ebc25e 231{
2482b0b0 232 struct vlan_eth_header *veh = dp_packet_eth(packet);
437d0d22 233
cf62fa4c 234 if (veh && dp_packet_size(packet) >= sizeof *veh
d6943394 235 && eth_type_vlan(veh->veth_type)) {
f4ebc25e 236
437d0d22 237 memmove((char *)veh + VLAN_HEADER_LEN, veh, 2 * ETH_ADDR_LEN);
cf62fa4c 238 dp_packet_resize_l2(packet, -VLAN_HEADER_LEN);
f4ebc25e
BP
239 }
240}
241
88fc5281
JS
242/* Push Ethernet header onto 'packet' assuming it is layer 3 */
243void
244push_eth(struct dp_packet *packet, const struct eth_addr *dst,
245 const struct eth_addr *src)
246{
247 struct eth_header *eh;
248
249 ovs_assert(packet->packet_type != htonl(PT_ETH));
250 eh = dp_packet_resize_l2(packet, ETH_HEADER_LEN);
251 eh->eth_dst = *dst;
252 eh->eth_src = *src;
253 eh->eth_type = pt_ns_type_be(packet->packet_type);
254 packet->packet_type = htonl(PT_ETH);
255}
256
257/* Removes Ethernet header, including VLAN header, from 'packet'.
258 *
259 * Previous to calling this function, 'ofpbuf_l3(packet)' must not be NULL */
260void
261pop_eth(struct dp_packet *packet)
262{
263 char *l2_5 = dp_packet_l2_5(packet);
264 char *l3 = dp_packet_l3(packet);
265 ovs_be16 ethertype;
266 int increment;
267
268 ovs_assert(packet->packet_type == htonl(PT_ETH));
269 ovs_assert(l3 != NULL);
270
271 if (l2_5) {
272 increment = packet->l2_5_ofs;
273 ethertype = *(ALIGNED_CAST(ovs_be16 *, (l2_5 - 2)));
274 } else {
275 increment = packet->l3_ofs;
276 ethertype = *(ALIGNED_CAST(ovs_be16 *, (l3 - 2)));
277 }
278
279 dp_packet_resize_l2(packet, -increment);
280 packet->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, ntohs(ethertype));
281}
282
b02475c5 283/* Set ethertype of the packet. */
56b02633 284static void
cf62fa4c 285set_ethertype(struct dp_packet *packet, ovs_be16 eth_type)
b02475c5 286{
2482b0b0 287 struct eth_header *eh = dp_packet_eth(packet);
cf3b7538
JR
288
289 if (!eh) {
290 return;
291 }
b02475c5 292
d6943394 293 if (eth_type_vlan(eh->eth_type)) {
b02475c5 294 ovs_be16 *p;
cf62fa4c 295 char *l2_5 = dp_packet_l2_5(packet);
437d0d22 296
db5a1019 297 p = ALIGNED_CAST(ovs_be16 *,
cf62fa4c 298 (l2_5 ? l2_5 : (char *)dp_packet_l3(packet)) - 2);
b02475c5
SH
299 *p = eth_type;
300 } else {
301 eh->eth_type = eth_type;
302 }
303}
304
cf62fa4c 305static bool is_mpls(struct dp_packet *packet)
b02475c5 306{
437d0d22 307 return packet->l2_5_ofs != UINT16_MAX;
b02475c5
SH
308}
309
310/* Set time to live (TTL) of an MPLS label stack entry (LSE). */
b676167a 311void
b02475c5
SH
312set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl)
313{
314 *lse &= ~htonl(MPLS_TTL_MASK);
315 *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK);
316}
317
318/* Set traffic class (TC) of an MPLS label stack entry (LSE). */
319void
320set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc)
321{
322 *lse &= ~htonl(MPLS_TC_MASK);
323 *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK);
324}
325
326/* Set label of an MPLS label stack entry (LSE). */
327void
328set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label)
329{
330 *lse &= ~htonl(MPLS_LABEL_MASK);
331 *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK);
332}
333
334/* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */
335void
336set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos)
337{
338 *lse &= ~htonl(MPLS_BOS_MASK);
339 *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK);
340}
341
342/* Compose an MPLS label stack entry (LSE) from its components:
343 * label, traffic class (TC), time to live (TTL) and
344 * bottom of stack (BoS) bit. */
345ovs_be32
346set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label)
347{
348 ovs_be32 lse = htonl(0);
349 set_mpls_lse_ttl(&lse, ttl);
350 set_mpls_lse_tc(&lse, tc);
351 set_mpls_lse_bos(&lse, bos);
352 set_mpls_lse_label(&lse, label);
353 return lse;
354}
355
b02475c5
SH
356/* Set MPLS label stack entry to outermost MPLS header.*/
357void
cf62fa4c 358set_mpls_lse(struct dp_packet *packet, ovs_be32 mpls_lse)
b02475c5 359{
b02475c5
SH
360 /* Packet type should be MPLS to set label stack entry. */
361 if (is_mpls(packet)) {
cf62fa4c 362 struct mpls_hdr *mh = dp_packet_l2_5(packet);
437d0d22 363
b02475c5 364 /* Update mpls label stack entry. */
5fa008d4 365 put_16aligned_be32(&mh->mpls_lse, mpls_lse);
b02475c5
SH
366 }
367}
368
898dcef1 369/* Push MPLS label stack entry 'lse' onto 'packet' as the outermost MPLS
b02475c5
SH
370 * header. If 'packet' does not already have any MPLS labels, then its
371 * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */
372void
cf62fa4c 373push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse)
b02475c5 374{
437d0d22
JR
375 char * header;
376 size_t len;
b02475c5
SH
377
378 if (!eth_type_mpls(ethtype)) {
379 return;
380 }
381
382 if (!is_mpls(packet)) {
437d0d22
JR
383 /* Set MPLS label stack offset. */
384 packet->l2_5_ofs = packet->l3_ofs;
b02475c5
SH
385 }
386
437d0d22
JR
387 set_ethertype(packet, ethtype);
388
b02475c5 389 /* Push new MPLS shim header onto packet. */
437d0d22 390 len = packet->l2_5_ofs;
cf62fa4c 391 header = dp_packet_resize_l2_5(packet, MPLS_HLEN);
437d0d22
JR
392 memmove(header, header + MPLS_HLEN, len);
393 memcpy(header + len, &lse, sizeof lse);
b02475c5
SH
394}
395
396/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry.
397 * If the label that was removed was the only MPLS label, changes 'packet''s
398 * Ethertype to 'ethtype' (which ordinarily should not be an MPLS
399 * Ethertype). */
400void
cf62fa4c 401pop_mpls(struct dp_packet *packet, ovs_be16 ethtype)
b02475c5 402{
b02475c5 403 if (is_mpls(packet)) {
cf62fa4c 404 struct mpls_hdr *mh = dp_packet_l2_5(packet);
437d0d22
JR
405 size_t len = packet->l2_5_ofs;
406
799a91bb 407 set_ethertype(packet, ethtype);
5fa008d4 408 if (get_16aligned_be32(&mh->mpls_lse) & htonl(MPLS_BOS_MASK)) {
cf62fa4c 409 dp_packet_set_l2_5(packet, NULL);
b02475c5
SH
410 }
411 /* Shift the l2 header forward. */
cf62fa4c
PS
412 memmove((char*)dp_packet_data(packet) + MPLS_HLEN, dp_packet_data(packet), len);
413 dp_packet_resize_l2_5(packet, -MPLS_HLEN);
b02475c5
SH
414 }
415}
416
1fc11c59
JS
417void
418encap_nsh(struct dp_packet *packet, const struct ovs_action_encap_nsh *encap)
419{
420 struct nsh_hdr *nsh;
421 size_t length = NSH_BASE_HDR_LEN + encap->mdlen;
422 uint8_t next_proto;
423
424 switch (ntohl(packet->packet_type)) {
425 case PT_ETH:
426 next_proto = NSH_P_ETHERNET;
427 break;
428 case PT_IPV4:
429 next_proto = NSH_P_IPV4;
430 break;
431 case PT_IPV6:
432 next_proto = NSH_P_IPV6;
433 break;
434 case PT_NSH:
435 next_proto = NSH_P_NSH;
436 break;
437 default:
438 OVS_NOT_REACHED();
439 }
440
441 nsh = (struct nsh_hdr *) dp_packet_push_uninit(packet, length);
9a180f2c
JS
442 nsh->ver_flags_ttl_len =
443 htons(((encap->flags << NSH_FLAGS_SHIFT) & NSH_FLAGS_MASK)
444 | (63 << NSH_TTL_SHIFT)
445 | ((length >> 2) << NSH_LEN_SHIFT));
446 nsh->md_type = (encap->mdtype << NSH_MDTYPE_SHIFT) & NSH_MDTYPE_MASK;
1fc11c59
JS
447 nsh->next_proto = next_proto;
448 put_16aligned_be32(&nsh->path_hdr, encap->path_hdr);
9a180f2c 449 switch (encap->mdtype) {
1fc11c59
JS
450 case NSH_M_TYPE1:
451 nsh->md1 = *ALIGNED_CAST(struct nsh_md1_ctx *, encap->metadata);
452 break;
453 case NSH_M_TYPE2: {
454 /* The MD2 metadata in encap is already padded to 4 bytes. */
7edef47b 455 memcpy(&nsh->md2, encap->metadata, encap->mdlen);
1fc11c59
JS
456 break;
457 }
458 default:
459 OVS_NOT_REACHED();
460 }
461
462 packet->packet_type = htonl(PT_NSH);
463 dp_packet_reset_offsets(packet);
464 packet->l3_ofs = 0;
465}
466
467bool
468decap_nsh(struct dp_packet *packet)
469{
470 struct nsh_hdr *nsh = (struct nsh_hdr *) dp_packet_l3(packet);
471 size_t length;
472 uint32_t next_pt;
473
474 if (packet->packet_type == htonl(PT_NSH) && nsh) {
475 switch (nsh->next_proto) {
476 case NSH_P_ETHERNET:
477 next_pt = PT_ETH;
478 break;
479 case NSH_P_IPV4:
480 next_pt = PT_IPV4;
481 break;
482 case NSH_P_IPV6:
483 next_pt = PT_IPV6;
484 break;
485 case NSH_P_NSH:
486 next_pt = PT_NSH;
487 break;
488 default:
489 /* Unknown inner packet type. Drop packet. */
490 return false;
491 }
492
493 length = nsh_hdr_len(nsh);
494 dp_packet_reset_packet(packet, length);
495 packet->packet_type = htonl(next_pt);
496 /* Packet must be recirculated for further processing. */
497 }
498 return true;
499}
500
e22f1753
BP
501/* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The
502 * caller must free '*packetp'. On success, returns NULL. On failure, returns
bb622f82
BP
503 * an error message and stores NULL in '*packetp'.
504 *
505 * Aligns the L3 header of '*packetp' on a 32-bit boundary. */
e22f1753 506const char *
cf62fa4c 507eth_from_hex(const char *hex, struct dp_packet **packetp)
e22f1753 508{
cf62fa4c 509 struct dp_packet *packet;
e22f1753 510
bb622f82 511 /* Use 2 bytes of headroom to 32-bit align the L3 header. */
cf62fa4c 512 packet = *packetp = dp_packet_new_with_headroom(strlen(hex) / 2, 2);
e22f1753 513
cf62fa4c
PS
514 if (dp_packet_put_hex(packet, hex, NULL)[0] != '\0') {
515 dp_packet_delete(packet);
e22f1753
BP
516 *packetp = NULL;
517 return "Trailing garbage in packet data";
518 }
519
cf62fa4c
PS
520 if (dp_packet_size(packet) < ETH_HEADER_LEN) {
521 dp_packet_delete(packet);
e22f1753
BP
522 *packetp = NULL;
523 return "Packet data too short for Ethernet";
524 }
525
526 return NULL;
527}
528
3b4d8ad3 529void
74ff3298
JR
530eth_format_masked(const struct eth_addr eth,
531 const struct eth_addr *mask, struct ds *s)
3b4d8ad3
JS
532{
533 ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth));
74ff3298
JR
534 if (mask && !eth_mask_is_exact(*mask)) {
535 ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(*mask));
3b4d8ad3
JS
536 }
537}
538
aad29cd1 539/* Given the IP netmask 'netmask', returns the number of bits of the IP address
c08201d6
BP
540 * that it specifies, that is, the number of 1-bits in 'netmask'.
541 *
542 * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will
543 * still be in the valid range but isn't otherwise meaningful. */
aad29cd1
BP
544int
545ip_count_cidr_bits(ovs_be32 netmask)
546{
d578065e 547 return 32 - ctz32(ntohl(netmask));
aad29cd1
BP
548}
549
550void
551ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s)
552{
ed36537e 553 ds_put_format(s, IP_FMT, IP_ARGS(ip));
b8266395 554 if (mask != OVS_BE32_MAX) {
aad29cd1
BP
555 if (ip_is_cidr(mask)) {
556 ds_put_format(s, "/%d", ip_count_cidr_bits(mask));
557 } else {
ed36537e 558 ds_put_format(s, "/"IP_FMT, IP_ARGS(mask));
aad29cd1
BP
559 }
560 }
561}
562
2b02db1b
BP
563/* Parses string 's', which must be an IP address. Stores the IP address into
564 * '*ip'. Returns true if successful, otherwise false. */
565bool
566ip_parse(const char *s, ovs_be32 *ip)
567{
568 return inet_pton(AF_INET, s, ip) == 1;
569}
570
e2bfcad6 571/* Parses string 's', which must be an IP address with a port number
572 * with ":" as a separator (e.g.: 192.168.1.2:80).
fab4e043 573 * Stores the IP address into '*ip' and port number to '*port'.
574 *
575 * Returns NULL if successful, otherwise an error message that the caller must
576 * free(). */
e2bfcad6 577char * OVS_WARN_UNUSED_RESULT
578ip_parse_port(const char *s, ovs_be32 *ip, ovs_be16 *port)
579{
580 int n = 0;
fab4e043 581 if (ovs_scan(s, IP_PORT_SCAN_FMT"%n", IP_PORT_SCAN_ARGS(ip, port), &n)
582 && !s[n]) {
583 return NULL;
e2bfcad6 584 }
585
fab4e043 586 return xasprintf("%s: invalid IP address or port number", s);
e2bfcad6 587}
588
61440451 589/* Parses string 's', which must be an IP address with an optional netmask or
7dc88496
NS
590 * CIDR prefix length. Stores the IP address into '*ip', netmask into '*mask',
591 * (255.255.255.255, if 's' lacks a netmask), and number of scanned characters
592 * into '*n'.
61440451
BP
593 *
594 * Returns NULL if successful, otherwise an error message that the caller must
595 * free(). */
596char * OVS_WARN_UNUSED_RESULT
7dc88496
NS
597ip_parse_masked_len(const char *s, int *n, ovs_be32 *ip,
598 ovs_be32 *mask)
61440451
BP
599{
600 int prefix;
601
7dc88496
NS
602 if (ovs_scan_len(s, n, IP_SCAN_FMT"/"IP_SCAN_FMT,
603 IP_SCAN_ARGS(ip), IP_SCAN_ARGS(mask))) {
61440451 604 /* OK. */
7dc88496
NS
605 } else if (ovs_scan_len(s, n, IP_SCAN_FMT"/%d",
606 IP_SCAN_ARGS(ip), &prefix)) {
4c9a736e
JP
607 if (prefix < 0 || prefix > 32) {
608 return xasprintf("%s: IPv4 network prefix bits not between 0 and "
609 "32, inclusive", s);
61440451
BP
610 }
611 *mask = be32_prefix_mask(prefix);
7dc88496 612 } else if (ovs_scan_len(s, n, IP_SCAN_FMT, IP_SCAN_ARGS(ip))) {
61440451
BP
613 *mask = OVS_BE32_MAX;
614 } else {
615 return xasprintf("%s: invalid IP address", s);
616 }
617 return NULL;
618}
aad29cd1 619
7dc88496
NS
620/* This function is similar to ip_parse_masked_len(), but doesn't return the
621 * number of scanned characters and expects 's' to end after the ip/(optional)
622 * mask.
623 *
624 * Returns NULL if successful, otherwise an error message that the caller must
625 * free(). */
2b02db1b 626char * OVS_WARN_UNUSED_RESULT
7dc88496
NS
627ip_parse_masked(const char *s, ovs_be32 *ip, ovs_be32 *mask)
628{
629 int n = 0;
630
631 char *error = ip_parse_masked_len(s, &n, ip, mask);
632 if (!error && s[n]) {
633 return xasprintf("%s: invalid IP address", s);
634 }
635 return error;
636}
637
638/* Similar to ip_parse_masked_len(), but the mask, if present, must be a CIDR
639 * mask and is returned as a prefix len in '*plen'. */
640char * OVS_WARN_UNUSED_RESULT
641ip_parse_cidr_len(const char *s, int *n, ovs_be32 *ip, unsigned int *plen)
2b02db1b
BP
642{
643 ovs_be32 mask;
644 char *error;
645
7dc88496 646 error = ip_parse_masked_len(s, n, ip, &mask);
2b02db1b
BP
647 if (error) {
648 return error;
649 }
650
651 if (!ip_is_cidr(mask)) {
652 return xasprintf("%s: CIDR network required", s);
653 }
654 *plen = ip_count_cidr_bits(mask);
655 return NULL;
656}
657
7dc88496
NS
658/* Similar to ip_parse_cidr_len(), but doesn't return the number of scanned
659 * characters and expects 's' to be NULL terminated at the end of the
660 * ip/(optional) cidr. */
661char * OVS_WARN_UNUSED_RESULT
662ip_parse_cidr(const char *s, ovs_be32 *ip, unsigned int *plen)
663{
664 int n = 0;
665
666 char *error = ip_parse_cidr_len(s, &n, ip, plen);
667 if (!error && s[n]) {
668 return xasprintf("%s: invalid IP address", s);
669 }
670 return error;
671}
672
e463f310
MM
673/* Parses the string into an IPv4 or IPv6 address.
674 * The port flags act as follows:
675 * * PORT_OPTIONAL: A port may be present but is not required
676 * * PORT_REQUIRED: A port must be present
677 * * PORT_FORBIDDEN: A port must not be present
678 */
679char * OVS_WARN_UNUSED_RESULT
680ipv46_parse(const char *s, enum port_flags flags, struct sockaddr_storage *ss)
681{
682 char *error = NULL;
683
684 char *copy;
685 copy = xstrdup(s);
686
687 char *addr;
688 char *port;
689 if (*copy == '[') {
690 char *end;
691
692 addr = copy + 1;
693 end = strchr(addr, ']');
694 if (!end) {
695 error = xasprintf("No closing bracket on address %s", s);
696 goto finish;
697 }
698 *end++ = '\0';
699 if (*end == ':') {
700 port = end + 1;
701 } else {
702 port = NULL;
703 }
704 } else {
705 addr = copy;
706 port = strchr(copy, ':');
707 if (port) {
708 if (strchr(port + 1, ':')) {
709 port = NULL;
710 } else {
711 *port++ = '\0';
712 }
713 }
714 }
715
716 if (port && !*port) {
717 error = xasprintf("Port is an empty string");
718 goto finish;
719 }
720
721 if (port && flags == PORT_FORBIDDEN) {
722 error = xasprintf("Port forbidden in address %s", s);
723 goto finish;
724 } else if (!port && flags == PORT_REQUIRED) {
725 error = xasprintf("Port required in address %s", s);
726 goto finish;
727 }
728
729 struct addrinfo hints = {
730 .ai_flags = AI_NUMERICHOST | AI_NUMERICSERV,
731 .ai_family = AF_UNSPEC,
732 };
733 struct addrinfo *res;
734 int status;
735 status = getaddrinfo(addr, port, &hints, &res);
736 if (status) {
737 error = xasprintf("Error parsing address %s: %s",
738 s, gai_strerror(status));
739 goto finish;
740 }
741 memcpy(ss, res->ai_addr, res->ai_addrlen);
742 freeaddrinfo(res);
743
744finish:
745 free(copy);
746 return error;
747}
748
2b02db1b
BP
749/* Parses string 's', which must be an IPv6 address. Stores the IPv6 address
750 * into '*ip'. Returns true if successful, otherwise false. */
751bool
752ipv6_parse(const char *s, struct in6_addr *ip)
753{
754 return inet_pton(AF_INET6, s, ip) == 1;
755}
756
757/* Parses string 's', which must be an IPv6 address with an optional netmask or
758 * CIDR prefix length. Stores the IPv6 address into '*ip' and the netmask into
7dc88496
NS
759 * '*mask' (if 's' does not contain a netmask, all-one-bits is assumed), and
760 * number of scanned characters into '*n'.
2b02db1b
BP
761 *
762 * Returns NULL if successful, otherwise an error message that the caller must
763 * free(). */
764char * OVS_WARN_UNUSED_RESULT
7dc88496
NS
765ipv6_parse_masked_len(const char *s, int *n, struct in6_addr *ip,
766 struct in6_addr *mask)
2b02db1b
BP
767{
768 char ipv6_s[IPV6_SCAN_LEN + 1];
769 int prefix;
2b02db1b 770
7dc88496
NS
771 if (ovs_scan_len(s, n, " "IPV6_SCAN_FMT, ipv6_s)
772 && ipv6_parse(ipv6_s, ip)) {
773 if (ovs_scan_len(s, n, "/%d", &prefix)) {
4c9a736e 774 if (prefix < 0 || prefix > 128) {
2b02db1b 775 return xasprintf("%s: IPv6 network prefix bits not between 0 "
4c9a736e 776 "and 128, inclusive", s);
2b02db1b
BP
777 }
778 *mask = ipv6_create_mask(prefix);
7dc88496
NS
779 } else if (ovs_scan_len(s, n, "/"IPV6_SCAN_FMT, ipv6_s)) {
780 if (!ipv6_parse(ipv6_s, mask)) {
781 return xasprintf("%s: Invalid IPv6 mask", s);
782 }
2b02db1b
BP
783 /* OK. */
784 } else {
7dc88496
NS
785 /* OK. No mask. */
786 *mask = in6addr_exact;
2b02db1b
BP
787 }
788 return NULL;
789 }
790 return xasprintf("%s: invalid IPv6 address", s);
791}
792
7dc88496
NS
793/* This function is similar to ipv6_parse_masked_len(), but doesn't return the
794 * number of scanned characters and expects 's' to end following the
795 * ipv6/(optional) mask. */
796char * OVS_WARN_UNUSED_RESULT
797ipv6_parse_masked(const char *s, struct in6_addr *ip, struct in6_addr *mask)
798{
799 int n = 0;
800
801 char *error = ipv6_parse_masked_len(s, &n, ip, mask);
802 if (!error && s[n]) {
803 return xasprintf("%s: invalid IPv6 address", s);
804 }
805 return error;
806}
807
808/* Similar to ipv6_parse_masked_len(), but the mask, if present, must be a CIDR
2b02db1b
BP
809 * mask and is returned as a prefix length in '*plen'. */
810char * OVS_WARN_UNUSED_RESULT
7dc88496
NS
811ipv6_parse_cidr_len(const char *s, int *n, struct in6_addr *ip,
812 unsigned int *plen)
2b02db1b
BP
813{
814 struct in6_addr mask;
815 char *error;
816
7dc88496 817 error = ipv6_parse_masked_len(s, n, ip, &mask);
2b02db1b
BP
818 if (error) {
819 return error;
820 }
821
822 if (!ipv6_is_cidr(&mask)) {
823 return xasprintf("%s: IPv6 CIDR network required", s);
824 }
825 *plen = ipv6_count_cidr_bits(&mask);
826 return NULL;
827}
828
7dc88496
NS
829/* Similar to ipv6_parse_cidr_len(), but doesn't return the number of scanned
830 * characters and expects 's' to end after the ipv6/(optional) cidr. */
831char * OVS_WARN_UNUSED_RESULT
832ipv6_parse_cidr(const char *s, struct in6_addr *ip, unsigned int *plen)
833{
834 int n = 0;
835
836 char *error = ipv6_parse_cidr_len(s, &n, ip, plen);
837 if (!error && s[n]) {
838 return xasprintf("%s: invalid IPv6 address", s);
839 }
840 return error;
841}
842
2b02db1b
BP
843/* Stores the string representation of the IPv6 address 'addr' into the
844 * character array 'addr_str', which must be at least INET6_ADDRSTRLEN
845 * bytes long. */
d31f1109 846void
ac6d120f 847ipv6_format_addr(const struct in6_addr *addr, struct ds *s)
d31f1109 848{
aad29cd1
BP
849 char *dst;
850
ac6d120f 851 ds_reserve(s, s->length + INET6_ADDRSTRLEN);
aad29cd1 852
ac6d120f
JP
853 dst = s->string + s->length;
854 inet_ntop(AF_INET6, addr, dst, INET6_ADDRSTRLEN);
855 s->length += strlen(dst);
aad29cd1 856}
d31f1109 857
9ac0aada
JR
858/* Same as print_ipv6_addr, but optionally encloses the address in square
859 * brackets. */
860void
861ipv6_format_addr_bracket(const struct in6_addr *addr, struct ds *s,
862 bool bracket)
863{
864 if (bracket) {
865 ds_put_char(s, '[');
866 }
867 ipv6_format_addr(addr, s);
868 if (bracket) {
869 ds_put_char(s, ']');
870 }
871}
872
964a4d5f 873void
ac6d120f 874ipv6_format_mapped(const struct in6_addr *addr, struct ds *s)
964a4d5f
TLSC
875{
876 if (IN6_IS_ADDR_V4MAPPED(addr)) {
877 ds_put_format(s, IP_FMT, addr->s6_addr[12], addr->s6_addr[13],
878 addr->s6_addr[14], addr->s6_addr[15]);
879 } else {
ac6d120f 880 ipv6_format_addr(addr, s);
964a4d5f
TLSC
881 }
882}
883
aad29cd1 884void
ac6d120f
JP
885ipv6_format_masked(const struct in6_addr *addr, const struct in6_addr *mask,
886 struct ds *s)
aad29cd1 887{
ac6d120f 888 ipv6_format_addr(addr, s);
aad29cd1
BP
889 if (mask && !ipv6_mask_is_exact(mask)) {
890 if (ipv6_is_cidr(mask)) {
891 int cidr_bits = ipv6_count_cidr_bits(mask);
892 ds_put_format(s, "/%d", cidr_bits);
893 } else {
894 ds_put_char(s, '/');
ac6d120f 895 ipv6_format_addr(mask, s);
aad29cd1
BP
896 }
897 }
d31f1109
JP
898}
899
bed610e8
TLSC
900/* Stores the string representation of the IPv6 address 'addr' into the
901 * character array 'addr_str', which must be at least INET6_ADDRSTRLEN
902 * bytes long. If addr is IPv4-mapped, store an IPv4 dotted-decimal string. */
903const char *
904ipv6_string_mapped(char *addr_str, const struct in6_addr *addr)
905{
906 ovs_be32 ip;
907 ip = in6_addr_get_mapped_ipv4(addr);
908 if (ip) {
909 return inet_ntop(AF_INET, &ip, addr_str, INET6_ADDRSTRLEN);
910 } else {
911 return inet_ntop(AF_INET6, addr, addr_str, INET6_ADDRSTRLEN);
912 }
913}
914
d31f1109 915#ifdef s6_addr32
b0ad27f3
JP
916#define s6_addrX s6_addr32
917#define IPV6_FOR_EACH(VAR) for (int VAR = 0; VAR < 4; VAR++)
d31f1109 918#else
b0ad27f3
JP
919#define s6_addrX s6_addr
920#define IPV6_FOR_EACH(VAR) for (int VAR = 0; VAR < 16; VAR++)
d31f1109
JP
921#endif
922
b0ad27f3
JP
923struct in6_addr
924ipv6_addr_bitand(const struct in6_addr *a, const struct in6_addr *b)
925{
926 struct in6_addr dst;
927 IPV6_FOR_EACH (i) {
928 dst.s6_addrX[i] = a->s6_addrX[i] & b->s6_addrX[i];
929 }
930 return dst;
931}
932
933struct in6_addr
934ipv6_addr_bitxor(const struct in6_addr *a, const struct in6_addr *b)
935{
936 struct in6_addr dst;
937 IPV6_FOR_EACH (i) {
938 dst.s6_addrX[i] = a->s6_addrX[i] ^ b->s6_addrX[i];
939 }
940 return dst;
941}
942
943bool
944ipv6_is_zero(const struct in6_addr *a)
945{
946 IPV6_FOR_EACH (i) {
947 if (a->s6_addrX[i]) {
948 return false;
949 }
950 }
951 return true;
d31f1109
JP
952}
953
954/* Returns an in6_addr consisting of 'mask' high-order 1-bits and 128-N
955 * low-order 0-bits. */
956struct in6_addr
957ipv6_create_mask(int mask)
958{
959 struct in6_addr netmask;
960 uint8_t *netmaskp = &netmask.s6_addr[0];
961
962 memset(&netmask, 0, sizeof netmask);
963 while (mask > 8) {
964 *netmaskp = 0xff;
965 netmaskp++;
966 mask -= 8;
967 }
968
969 if (mask) {
970 *netmaskp = 0xff << (8 - mask);
971 }
972
973 return netmask;
974}
975
aad29cd1
BP
976/* Given the IPv6 netmask 'netmask', returns the number of bits of the IPv6
977 * address that it specifies, that is, the number of 1-bits in 'netmask'.
ff0b06ee
BP
978 * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()).
979 *
980 * If 'netmask' is not a CIDR netmask (see ipv6_is_cidr()), the return value
981 * will still be in the valid range but isn't otherwise meaningful. */
d31f1109
JP
982int
983ipv6_count_cidr_bits(const struct in6_addr *netmask)
984{
985 int i;
986 int count = 0;
987 const uint8_t *netmaskp = &netmask->s6_addr[0];
988
d31f1109
JP
989 for (i=0; i<16; i++) {
990 if (netmaskp[i] == 0xff) {
991 count += 8;
992 } else {
993 uint8_t nm;
994
995 for(nm = netmaskp[i]; nm; nm <<= 1) {
996 count++;
997 }
998 break;
999 }
1000
1001 }
1002
1003 return count;
1004}
1005
d31f1109
JP
1006/* Returns true if 'netmask' is a CIDR netmask, that is, if it consists of N
1007 * high-order 1-bits and 128-N low-order 0-bits. */
1008bool
1009ipv6_is_cidr(const struct in6_addr *netmask)
1010{
1011 const uint8_t *netmaskp = &netmask->s6_addr[0];
1012 int i;
1013
1014 for (i=0; i<16; i++) {
1015 if (netmaskp[i] != 0xff) {
1016 uint8_t x = ~netmaskp[i];
1017 if (x & (x + 1)) {
1018 return false;
1019 }
1020 while (++i < 16) {
1021 if (netmaskp[i]) {
1022 return false;
1023 }
1024 }
1025 }
1026 }
1027
1028 return true;
1029}
c25c91fd 1030
5de1bb5c
BP
1031/* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst',
1032 * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated
1033 * in 'b' and returned. This payload may be populated with appropriate
cf3b7538
JR
1034 * information by the caller. Sets 'b''s 'frame' pointer and 'l3' offset to
1035 * the Ethernet header and payload respectively. Aligns b->l3 on a 32-bit
bb622f82 1036 * boundary.
eda1f38d
BP
1037 *
1038 * The returned packet has enough headroom to insert an 802.1Q VLAN header if
1039 * desired. */
40f78b38 1040void *
74ff3298
JR
1041eth_compose(struct dp_packet *b, const struct eth_addr eth_dst,
1042 const struct eth_addr eth_src, uint16_t eth_type,
5de1bb5c 1043 size_t size)
c25c91fd 1044{
40f78b38 1045 void *data;
c25c91fd 1046 struct eth_header *eth;
c25c91fd 1047
cf62fa4c 1048 dp_packet_clear(b);
c25c91fd 1049
bb622f82
BP
1050 /* The magic 2 here ensures that the L3 header (when it is added later)
1051 * will be 32-bit aligned. */
cf62fa4c
PS
1052 dp_packet_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + size);
1053 dp_packet_reserve(b, 2 + VLAN_HEADER_LEN);
1054 eth = dp_packet_put_uninit(b, ETH_HEADER_LEN);
c4bee4cb 1055 data = dp_packet_put_zeros(b, size);
c25c91fd 1056
74ff3298
JR
1057 eth->eth_dst = eth_dst;
1058 eth->eth_src = eth_src;
40f78b38
EJ
1059 eth->eth_type = htons(eth_type);
1060
2482b0b0 1061 b->packet_type = htonl(PT_ETH);
82eb5b0a 1062 dp_packet_reset_offsets(b);
cf62fa4c 1063 dp_packet_set_l3(b, data);
75a4ead1 1064
40f78b38 1065 return data;
07a6cf77
EJ
1066}
1067
fc052306 1068void
cf62fa4c 1069packet_set_ipv4_addr(struct dp_packet *packet,
7c457c33 1070 ovs_16aligned_be32 *addr, ovs_be32 new_addr)
c97664b3 1071{
cf62fa4c 1072 struct ip_header *nh = dp_packet_l3(packet);
7c457c33 1073 ovs_be32 old_addr = get_16aligned_be32(addr);
cf62fa4c 1074 size_t l4_size = dp_packet_l4_size(packet);
c97664b3 1075
5a51b2cd 1076 if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
cf62fa4c 1077 struct tcp_header *th = dp_packet_l4(packet);
c97664b3 1078
7c457c33 1079 th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
5a51b2cd 1080 } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
cf62fa4c 1081 struct udp_header *uh = dp_packet_l4(packet);
c97664b3
EJ
1082
1083 if (uh->udp_csum) {
7c457c33 1084 uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
c97664b3
EJ
1085 if (!uh->udp_csum) {
1086 uh->udp_csum = htons(0xffff);
1087 }
1088 }
1089 }
7c457c33
BP
1090 nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
1091 put_16aligned_be32(addr, new_addr);
c97664b3
EJ
1092}
1093
bc7a5acd
AA
1094/* Returns true, if packet contains at least one routing header where
1095 * segements_left > 0.
1096 *
437d0d22 1097 * This function assumes that L3 and L4 offsets are set in the packet. */
bc7a5acd 1098static bool
31a9a584 1099packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr)
bc7a5acd 1100{
4528f34f 1101 const struct ovs_16aligned_ip6_hdr *nh;
bc7a5acd
AA
1102 size_t len;
1103 size_t remaining;
cf62fa4c 1104 uint8_t *data = dp_packet_l3(packet);
bc7a5acd 1105
437d0d22 1106 remaining = packet->l4_ofs - packet->l3_ofs;
bc7a5acd
AA
1107 if (remaining < sizeof *nh) {
1108 return false;
1109 }
4528f34f 1110 nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data);
bc7a5acd
AA
1111 data += sizeof *nh;
1112 remaining -= sizeof *nh;
31a9a584 1113 *nexthdr = nh->ip6_nxt;
bc7a5acd
AA
1114
1115 while (1) {
31a9a584
SH
1116 if ((*nexthdr != IPPROTO_HOPOPTS)
1117 && (*nexthdr != IPPROTO_ROUTING)
1118 && (*nexthdr != IPPROTO_DSTOPTS)
1119 && (*nexthdr != IPPROTO_AH)
1120 && (*nexthdr != IPPROTO_FRAGMENT)) {
bc7a5acd
AA
1121 /* It's either a terminal header (e.g., TCP, UDP) or one we
1122 * don't understand. In either case, we're done with the
1123 * packet, so use it to fill in 'nw_proto'. */
1124 break;
1125 }
1126
1127 /* We only verify that at least 8 bytes of the next header are
1128 * available, but many of these headers are longer. Ensure that
1129 * accesses within the extension header are within those first 8
1130 * bytes. All extension headers are required to be at least 8
1131 * bytes. */
1132 if (remaining < 8) {
1133 return false;
1134 }
1135
31a9a584 1136 if (*nexthdr == IPPROTO_AH) {
bc7a5acd
AA
1137 /* A standard AH definition isn't available, but the fields
1138 * we care about are in the same location as the generic
1139 * option header--only the header length is calculated
1140 * differently. */
1141 const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
1142
31a9a584 1143 *nexthdr = ext_hdr->ip6e_nxt;
bc7a5acd 1144 len = (ext_hdr->ip6e_len + 2) * 4;
31a9a584 1145 } else if (*nexthdr == IPPROTO_FRAGMENT) {
4528f34f
BP
1146 const struct ovs_16aligned_ip6_frag *frag_hdr
1147 = ALIGNED_CAST(struct ovs_16aligned_ip6_frag *, data);
bc7a5acd 1148
31a9a584 1149 *nexthdr = frag_hdr->ip6f_nxt;
bc7a5acd 1150 len = sizeof *frag_hdr;
31a9a584 1151 } else if (*nexthdr == IPPROTO_ROUTING) {
bc7a5acd
AA
1152 const struct ip6_rthdr *rh = (struct ip6_rthdr *)data;
1153
1154 if (rh->ip6r_segleft > 0) {
1155 return true;
1156 }
1157
31a9a584 1158 *nexthdr = rh->ip6r_nxt;
bc7a5acd
AA
1159 len = (rh->ip6r_len + 1) * 8;
1160 } else {
1161 const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
1162
31a9a584 1163 *nexthdr = ext_hdr->ip6e_nxt;
bc7a5acd
AA
1164 len = (ext_hdr->ip6e_len + 1) * 8;
1165 }
1166
1167 if (remaining < len) {
1168 return false;
1169 }
1170 remaining -= len;
1171 data += len;
1172 }
1173
1174 return false;
1175}
1176
1177static void
cf62fa4c 1178packet_update_csum128(struct dp_packet *packet, uint8_t proto,
932c96b7
JR
1179 ovs_16aligned_be32 addr[4],
1180 const struct in6_addr *new_addr)
bc7a5acd 1181{
cf62fa4c 1182 size_t l4_size = dp_packet_l4_size(packet);
5a51b2cd
JR
1183
1184 if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
cf62fa4c 1185 struct tcp_header *th = dp_packet_l4(packet);
bc7a5acd
AA
1186
1187 th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
5a51b2cd 1188 } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
cf62fa4c 1189 struct udp_header *uh = dp_packet_l4(packet);
bc7a5acd
AA
1190
1191 if (uh->udp_csum) {
1192 uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
1193 if (!uh->udp_csum) {
1194 uh->udp_csum = htons(0xffff);
1195 }
1196 }
5abf65d0
JG
1197 } else if (proto == IPPROTO_ICMPV6 &&
1198 l4_size >= sizeof(struct icmp6_header)) {
cf62fa4c 1199 struct icmp6_header *icmp = dp_packet_l4(packet);
00894212
JG
1200
1201 icmp->icmp6_cksum = recalc_csum128(icmp->icmp6_cksum, addr, new_addr);
bc7a5acd
AA
1202 }
1203}
1204
0e29d884 1205void
cf62fa4c 1206packet_set_ipv6_addr(struct dp_packet *packet, uint8_t proto,
932c96b7
JR
1207 ovs_16aligned_be32 addr[4],
1208 const struct in6_addr *new_addr,
bc7a5acd
AA
1209 bool recalculate_csum)
1210{
1211 if (recalculate_csum) {
4528f34f 1212 packet_update_csum128(packet, proto, addr, new_addr);
bc7a5acd 1213 }
4068403a 1214 memcpy(addr, new_addr, sizeof(ovs_be32[4]));
bc7a5acd
AA
1215}
1216
1217static void
4528f34f 1218packet_set_ipv6_flow_label(ovs_16aligned_be32 *flow_label, ovs_be32 flow_key)
bc7a5acd 1219{
4528f34f
BP
1220 ovs_be32 old_label = get_16aligned_be32(flow_label);
1221 ovs_be32 new_label = (old_label & htonl(~IPV6_LABEL_MASK)) | flow_key;
1222 put_16aligned_be32(flow_label, new_label);
bc7a5acd
AA
1223}
1224
1225static void
4528f34f 1226packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc)
bc7a5acd 1227{
4528f34f
BP
1228 ovs_be32 old_label = get_16aligned_be32(flow_label);
1229 ovs_be32 new_label = (old_label & htonl(0xF00FFFFF)) | htonl(tc << 20);
1230 put_16aligned_be32(flow_label, new_label);
bc7a5acd
AA
1231}
1232
c97664b3
EJ
1233/* Modifies the IPv4 header fields of 'packet' to be consistent with 'src',
1234 * 'dst', 'tos', and 'ttl'. Updates 'packet''s L4 checksums as appropriate.
1235 * 'packet' must contain a valid IPv4 packet with correctly populated l[347]
1236 * markers. */
1237void
cf62fa4c 1238packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
c97664b3
EJ
1239 uint8_t tos, uint8_t ttl)
1240{
cf62fa4c 1241 struct ip_header *nh = dp_packet_l3(packet);
c97664b3 1242
7c457c33 1243 if (get_16aligned_be32(&nh->ip_src) != src) {
c97664b3
EJ
1244 packet_set_ipv4_addr(packet, &nh->ip_src, src);
1245 }
1246
7c457c33 1247 if (get_16aligned_be32(&nh->ip_dst) != dst) {
c97664b3
EJ
1248 packet_set_ipv4_addr(packet, &nh->ip_dst, dst);
1249 }
1250
1251 if (nh->ip_tos != tos) {
1252 uint8_t *field = &nh->ip_tos;
1253
1254 nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
1255 htons((uint16_t) tos));
1256 *field = tos;
1257 }
1258
1259 if (nh->ip_ttl != ttl) {
1260 uint8_t *field = &nh->ip_ttl;
1261
1262 nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
1263 htons(ttl << 8));
1264 *field = ttl;
1265 }
1266}
1267
bc7a5acd
AA
1268/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src',
1269 * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as
1270 * appropriate. 'packet' must contain a valid IPv6 packet with correctly
437d0d22 1271 * populated l[34] offsets. */
bc7a5acd 1272void
932c96b7
JR
1273packet_set_ipv6(struct dp_packet *packet, const struct in6_addr *src,
1274 const struct in6_addr *dst, uint8_t key_tc, ovs_be32 key_fl,
bc7a5acd
AA
1275 uint8_t key_hl)
1276{
cf62fa4c 1277 struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
31a9a584
SH
1278 uint8_t proto = 0;
1279 bool rh_present;
1280
1281 rh_present = packet_rh_present(packet, &proto);
bc7a5acd
AA
1282
1283 if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) {
4528f34f 1284 packet_set_ipv6_addr(packet, proto, nh->ip6_src.be32, src, true);
bc7a5acd
AA
1285 }
1286
1287 if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) {
4528f34f 1288 packet_set_ipv6_addr(packet, proto, nh->ip6_dst.be32, dst,
31a9a584 1289 !rh_present);
bc7a5acd
AA
1290 }
1291
1292 packet_set_ipv6_tc(&nh->ip6_flow, key_tc);
bc7a5acd 1293 packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl);
bc7a5acd
AA
1294 nh->ip6_hlim = key_hl;
1295}
1296
c97664b3
EJ
1297static void
1298packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
1299{
1300 if (*port != new_port) {
1301 *csum = recalc_csum16(*csum, *port, new_port);
1302 *port = new_port;
1303 }
1304}
1305
1306/* Sets the TCP source and destination port ('src' and 'dst' respectively) of
1307 * the TCP header contained in 'packet'. 'packet' must be a valid TCP packet
437d0d22 1308 * with its l4 offset properly populated. */
c97664b3 1309void
cf62fa4c 1310packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
c97664b3 1311{
cf62fa4c 1312 struct tcp_header *th = dp_packet_l4(packet);
c97664b3
EJ
1313
1314 packet_set_port(&th->tcp_src, src, &th->tcp_csum);
1315 packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
1316}
1317
1318/* Sets the UDP source and destination port ('src' and 'dst' respectively) of
1319 * the UDP header contained in 'packet'. 'packet' must be a valid UDP packet
437d0d22 1320 * with its l4 offset properly populated. */
c97664b3 1321void
cf62fa4c 1322packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
c97664b3 1323{
cf62fa4c 1324 struct udp_header *uh = dp_packet_l4(packet);
c97664b3
EJ
1325
1326 if (uh->udp_csum) {
1327 packet_set_port(&uh->udp_src, src, &uh->udp_csum);
1328 packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
1329
1330 if (!uh->udp_csum) {
1331 uh->udp_csum = htons(0xffff);
1332 }
1333 } else {
1334 uh->udp_src = src;
1335 uh->udp_dst = dst;
1336 }
1337}
12113c39 1338
c6bcb685
JS
1339/* Sets the SCTP source and destination port ('src' and 'dst' respectively) of
1340 * the SCTP header contained in 'packet'. 'packet' must be a valid SCTP packet
437d0d22 1341 * with its l4 offset properly populated. */
c6bcb685 1342void
cf62fa4c 1343packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
c6bcb685 1344{
cf62fa4c 1345 struct sctp_header *sh = dp_packet_l4(packet);
c6bcb685 1346 ovs_be32 old_csum, old_correct_csum, new_csum;
cf62fa4c 1347 uint16_t tp_len = dp_packet_l4_size(packet);
c6bcb685 1348
5fa008d4
BP
1349 old_csum = get_16aligned_be32(&sh->sctp_csum);
1350 put_16aligned_be32(&sh->sctp_csum, 0);
437d0d22 1351 old_correct_csum = crc32c((void *)sh, tp_len);
c6bcb685
JS
1352
1353 sh->sctp_src = src;
1354 sh->sctp_dst = dst;
1355
437d0d22 1356 new_csum = crc32c((void *)sh, tp_len);
5fa008d4 1357 put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
c6bcb685
JS
1358}
1359
b8786b18
JP
1360/* Sets the ICMP type and code of the ICMP header contained in 'packet'.
1361 * 'packet' must be a valid ICMP packet with its l4 offset properly
1362 * populated. */
1363void
1364packet_set_icmp(struct dp_packet *packet, uint8_t type, uint8_t code)
1365{
1366 struct icmp_header *ih = dp_packet_l4(packet);
1367 ovs_be16 orig_tc = htons(ih->icmp_type << 8 | ih->icmp_code);
1368 ovs_be16 new_tc = htons(type << 8 | code);
1369
1370 if (orig_tc != new_tc) {
1371 ih->icmp_type = type;
1372 ih->icmp_code = code;
1373
1374 ih->icmp_csum = recalc_csum16(ih->icmp_csum, orig_tc, new_tc);
1375 }
1376}
1377
e60e935b 1378void
932c96b7 1379packet_set_nd(struct dp_packet *packet, const struct in6_addr *target,
c4bee4cb
PS
1380 const struct eth_addr sll, const struct eth_addr tll)
1381{
e60e935b 1382 struct ovs_nd_msg *ns;
86d46f3c 1383 struct ovs_nd_lla_opt *opt;
cf62fa4c 1384 int bytes_remain = dp_packet_l4_size(packet);
e60e935b
SRCSA
1385
1386 if (OVS_UNLIKELY(bytes_remain < sizeof(*ns))) {
1387 return;
1388 }
1389
cf62fa4c 1390 ns = dp_packet_l4(packet);
86d46f3c 1391 opt = &ns->options[0];
e60e935b
SRCSA
1392 bytes_remain -= sizeof(*ns);
1393
1394 if (memcmp(&ns->target, target, sizeof(ovs_be32[4]))) {
932c96b7
JR
1395 packet_set_ipv6_addr(packet, IPPROTO_ICMPV6, ns->target.be32, target,
1396 true);
e60e935b
SRCSA
1397 }
1398
86d46f3c
ZKL
1399 while (bytes_remain >= ND_LLA_OPT_LEN && opt->len != 0) {
1400 if (opt->type == ND_OPT_SOURCE_LINKADDR && opt->len == 1) {
1401 if (!eth_addr_equals(opt->mac, sll)) {
e60e935b
SRCSA
1402 ovs_be16 *csum = &(ns->icmph.icmp6_cksum);
1403
86d46f3c
ZKL
1404 *csum = recalc_csum48(*csum, opt->mac, sll);
1405 opt->mac = sll;
e60e935b
SRCSA
1406 }
1407
1408 /* A packet can only contain one SLL or TLL option */
1409 break;
86d46f3c
ZKL
1410 } else if (opt->type == ND_OPT_TARGET_LINKADDR && opt->len == 1) {
1411 if (!eth_addr_equals(opt->mac, tll)) {
e60e935b
SRCSA
1412 ovs_be16 *csum = &(ns->icmph.icmp6_cksum);
1413
86d46f3c
ZKL
1414 *csum = recalc_csum48(*csum, opt->mac, tll);
1415 opt->mac = tll;
e60e935b
SRCSA
1416 }
1417
1418 /* A packet can only contain one SLL or TLL option */
1419 break;
1420 }
1421
86d46f3c
ZKL
1422 opt += opt->len;
1423 bytes_remain -= opt->len * ND_LLA_OPT_LEN;
e60e935b
SRCSA
1424 }
1425}
1426
61bf6666
JR
1427const char *
1428packet_tcp_flag_to_string(uint32_t flag)
1429{
1430 switch (flag) {
1431 case TCP_FIN:
1432 return "fin";
1433 case TCP_SYN:
1434 return "syn";
1435 case TCP_RST:
1436 return "rst";
1437 case TCP_PSH:
1438 return "psh";
1439 case TCP_ACK:
1440 return "ack";
1441 case TCP_URG:
1442 return "urg";
1443 case TCP_ECE:
1444 return "ece";
1445 case TCP_CWR:
1446 return "cwr";
1447 case TCP_NS:
1448 return "ns";
1449 case 0x200:
1450 return "[200]";
1451 case 0x400:
1452 return "[400]";
1453 case 0x800:
1454 return "[800]";
1455 default:
1456 return NULL;
1457 }
1458}
1459
7393104d 1460/* Appends a string representation of the TCP flags value 'tcp_flags'
f41b5b3b 1461 * (e.g. from struct flow.tcp_flags or obtained via TCP_FLAGS) to 's', in the
7393104d
BP
1462 * format used by tcpdump. */
1463void
a66733a8 1464packet_format_tcp_flags(struct ds *s, uint16_t tcp_flags)
7393104d
BP
1465{
1466 if (!tcp_flags) {
1467 ds_put_cstr(s, "none");
1468 return;
1469 }
1470
1471 if (tcp_flags & TCP_SYN) {
1472 ds_put_char(s, 'S');
1473 }
1474 if (tcp_flags & TCP_FIN) {
1475 ds_put_char(s, 'F');
1476 }
1477 if (tcp_flags & TCP_PSH) {
1478 ds_put_char(s, 'P');
1479 }
1480 if (tcp_flags & TCP_RST) {
1481 ds_put_char(s, 'R');
1482 }
1483 if (tcp_flags & TCP_URG) {
1484 ds_put_char(s, 'U');
1485 }
1486 if (tcp_flags & TCP_ACK) {
1487 ds_put_char(s, '.');
1488 }
a66733a8
JR
1489 if (tcp_flags & TCP_ECE) {
1490 ds_put_cstr(s, "E");
7393104d 1491 }
a66733a8
JR
1492 if (tcp_flags & TCP_CWR) {
1493 ds_put_cstr(s, "C");
1494 }
1495 if (tcp_flags & TCP_NS) {
1496 ds_put_cstr(s, "N");
1497 }
1498 if (tcp_flags & 0x200) {
1499 ds_put_cstr(s, "[200]");
1500 }
1501 if (tcp_flags & 0x400) {
1502 ds_put_cstr(s, "[400]");
1503 }
1504 if (tcp_flags & 0x800) {
1505 ds_put_cstr(s, "[800]");
7393104d
BP
1506 }
1507}
a36de779
PS
1508
1509#define ARP_PACKET_SIZE (2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + \
1510 ARP_ETH_HEADER_LEN)
1511
eb0b295e
BP
1512/* Clears 'b' and replaces its contents by an ARP frame with the specified
1513 * 'arp_op', 'arp_sha', 'arp_tha', 'arp_spa', and 'arp_tpa'. The outer
1514 * Ethernet frame is initialized with Ethernet source 'arp_sha' and destination
1515 * 'arp_tha', except that destination ff:ff:ff:ff:ff:ff is used instead if
6335d074 1516 * 'broadcast' is true. Points the L3 header to the ARP header. */
a36de779 1517void
eb0b295e 1518compose_arp(struct dp_packet *b, uint16_t arp_op,
74ff3298
JR
1519 const struct eth_addr arp_sha, const struct eth_addr arp_tha,
1520 bool broadcast, ovs_be32 arp_spa, ovs_be32 arp_tpa)
a36de779 1521{
6335d074
BP
1522 compose_arp__(b);
1523
2482b0b0 1524 struct eth_header *eth = dp_packet_eth(b);
6335d074
BP
1525 eth->eth_dst = broadcast ? eth_addr_broadcast : arp_tha;
1526 eth->eth_src = arp_sha;
1527
1528 struct arp_eth_header *arp = dp_packet_l3(b);
1529 arp->ar_op = htons(arp_op);
1530 arp->ar_sha = arp_sha;
1531 arp->ar_tha = arp_tha;
1532 put_16aligned_be32(&arp->ar_spa, arp_spa);
1533 put_16aligned_be32(&arp->ar_tpa, arp_tpa);
1534}
a36de779 1535
6335d074
BP
1536/* Clears 'b' and replaces its contents by an ARP frame. Sets the fields in
1537 * the Ethernet and ARP headers that are fixed for ARP frames to those fixed
1538 * values, and zeroes the other fields. Points the L3 header to the ARP
1539 * header. */
1540void
1541compose_arp__(struct dp_packet *b)
1542{
cf62fa4c
PS
1543 dp_packet_clear(b);
1544 dp_packet_prealloc_tailroom(b, ARP_PACKET_SIZE);
1545 dp_packet_reserve(b, 2 + VLAN_HEADER_LEN);
a36de779 1546
6335d074 1547 struct eth_header *eth = dp_packet_put_zeros(b, sizeof *eth);
a36de779
PS
1548 eth->eth_type = htons(ETH_TYPE_ARP);
1549
6335d074 1550 struct arp_eth_header *arp = dp_packet_put_zeros(b, sizeof *arp);
a36de779
PS
1551 arp->ar_hrd = htons(ARP_HRD_ETHERNET);
1552 arp->ar_pro = htons(ARP_PRO_IP);
1553 arp->ar_hln = sizeof arp->ar_sha;
1554 arp->ar_pln = sizeof arp->ar_spa;
a36de779 1555
82eb5b0a 1556 dp_packet_reset_offsets(b);
cf62fa4c 1557 dp_packet_set_l3(b, arp);
2482b0b0
JS
1558
1559 b->packet_type = htonl(PT_ETH);
a36de779 1560}
0292a0c9 1561
16187903 1562/* This function expects packet with ethernet header with correct
c4bee4cb
PS
1563 * l3 pointer set. */
1564static void *
16187903
JP
1565compose_ipv6(struct dp_packet *packet, uint8_t proto,
1566 const struct in6_addr *src, const struct in6_addr *dst,
1567 uint8_t key_tc, ovs_be32 key_fl, uint8_t key_hl, int size)
c4bee4cb
PS
1568{
1569 struct ip6_hdr *nh;
1570 void *data;
1571
1572 nh = dp_packet_l3(packet);
1573 nh->ip6_vfc = 0x60;
1574 nh->ip6_nxt = proto;
1575 nh->ip6_plen = htons(size);
1576 data = dp_packet_put_zeros(packet, size);
1577 dp_packet_set_l4(packet, data);
932c96b7 1578 packet_set_ipv6(packet, src, dst, key_tc, key_fl, key_hl);
c4bee4cb
PS
1579 return data;
1580}
1581
16187903 1582/* Compose an IPv6 Neighbor Discovery Neighbor Solicitation message. */
c2b878e0 1583void
16187903
JP
1584compose_nd_ns(struct dp_packet *b, const struct eth_addr eth_src,
1585 const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst)
c2b878e0
TLSC
1586{
1587 struct in6_addr sn_addr;
1588 struct eth_addr eth_dst;
1589 struct ovs_nd_msg *ns;
86d46f3c 1590 struct ovs_nd_lla_opt *lla_opt;
c4bee4cb 1591 uint32_t icmp_csum;
c2b878e0
TLSC
1592
1593 in6_addr_solicited_node(&sn_addr, ipv6_dst);
1594 ipv6_multicast_to_ethernet(&eth_dst, &sn_addr);
1595
c4bee4cb 1596 eth_compose(b, eth_dst, eth_src, ETH_TYPE_IPV6, IPV6_HEADER_LEN);
16187903 1597 ns = compose_ipv6(b, IPPROTO_ICMPV6, ipv6_src, &sn_addr,
86d46f3c 1598 0, 0, 255, ND_MSG_LEN + ND_LLA_OPT_LEN);
c2b878e0
TLSC
1599
1600 ns->icmph.icmp6_type = ND_NEIGHBOR_SOLICIT;
1601 ns->icmph.icmp6_code = 0;
29d5e9a7 1602 put_16aligned_be32(&ns->rso_flags, htonl(0));
c2b878e0 1603
86d46f3c
ZKL
1604 lla_opt = &ns->options[0];
1605 lla_opt->type = ND_OPT_SOURCE_LINKADDR;
1606 lla_opt->len = 1;
c4bee4cb 1607
932c96b7 1608 packet_set_nd(b, ipv6_dst, eth_src, eth_addr_zero);
16187903 1609
c4bee4cb
PS
1610 ns->icmph.icmp6_cksum = 0;
1611 icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
86d46f3c
ZKL
1612 ns->icmph.icmp6_cksum = csum_finish(
1613 csum_continue(icmp_csum, ns, ND_MSG_LEN + ND_LLA_OPT_LEN));
c2b878e0
TLSC
1614}
1615
16187903 1616/* Compose an IPv6 Neighbor Discovery Neighbor Advertisement message. */
e75451fe 1617void
16187903
JP
1618compose_nd_na(struct dp_packet *b,
1619 const struct eth_addr eth_src, const struct eth_addr eth_dst,
1620 const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst,
1621 ovs_be32 rso_flags)
e75451fe
ZKL
1622{
1623 struct ovs_nd_msg *na;
86d46f3c 1624 struct ovs_nd_lla_opt *lla_opt;
e75451fe
ZKL
1625 uint32_t icmp_csum;
1626
1627 eth_compose(b, eth_dst, eth_src, ETH_TYPE_IPV6, IPV6_HEADER_LEN);
16187903 1628 na = compose_ipv6(b, IPPROTO_ICMPV6, ipv6_src, ipv6_dst,
86d46f3c 1629 0, 0, 255, ND_MSG_LEN + ND_LLA_OPT_LEN);
e75451fe
ZKL
1630
1631 na->icmph.icmp6_type = ND_NEIGHBOR_ADVERT;
1632 na->icmph.icmp6_code = 0;
29d5e9a7 1633 put_16aligned_be32(&na->rso_flags, rso_flags);
e75451fe 1634
86d46f3c
ZKL
1635 lla_opt = &na->options[0];
1636 lla_opt->type = ND_OPT_TARGET_LINKADDR;
1637 lla_opt->len = 1;
e75451fe 1638
932c96b7 1639 packet_set_nd(b, ipv6_src, eth_addr_zero, eth_src);
16187903 1640
e75451fe
ZKL
1641 na->icmph.icmp6_cksum = 0;
1642 icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
86d46f3c
ZKL
1643 na->icmph.icmp6_cksum = csum_finish(csum_continue(
1644 icmp_csum, na, ND_MSG_LEN + ND_LLA_OPT_LEN));
e75451fe
ZKL
1645}
1646
b24ab67c
ZKL
1647/* Compose an IPv6 Neighbor Discovery Router Advertisement message with
1648 * Source Link-layer Address Option and MTU Option.
1649 * Caller can call packet_put_ra_prefix_opt to append Prefix Information
1650 * Options to composed messags in 'b'. */
1651void
1652compose_nd_ra(struct dp_packet *b,
1653 const struct eth_addr eth_src, const struct eth_addr eth_dst,
1654 const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst,
1655 uint8_t cur_hop_limit, uint8_t mo_flags,
1656 ovs_be16 router_lt, ovs_be32 reachable_time,
4446661a 1657 ovs_be32 retrans_timer, uint32_t mtu)
b24ab67c
ZKL
1658{
1659 /* Don't compose Router Advertisement packet with MTU Option if mtu
1660 * value is 0. */
1661 bool with_mtu = mtu != 0;
1662 size_t mtu_opt_len = with_mtu ? ND_MTU_OPT_LEN : 0;
1663
1664 eth_compose(b, eth_dst, eth_src, ETH_TYPE_IPV6, IPV6_HEADER_LEN);
1665
1666 struct ovs_ra_msg *ra = compose_ipv6(
1667 b, IPPROTO_ICMPV6, ipv6_src, ipv6_dst, 0, 0, 255,
86d46f3c 1668 RA_MSG_LEN + ND_LLA_OPT_LEN + mtu_opt_len);
b24ab67c
ZKL
1669 ra->icmph.icmp6_type = ND_ROUTER_ADVERT;
1670 ra->icmph.icmp6_code = 0;
1671 ra->cur_hop_limit = cur_hop_limit;
1672 ra->mo_flags = mo_flags;
1673 ra->router_lifetime = router_lt;
1674 ra->reachable_time = reachable_time;
1675 ra->retrans_timer = retrans_timer;
1676
86d46f3c
ZKL
1677 struct ovs_nd_lla_opt *lla_opt = ra->options;
1678 lla_opt->type = ND_OPT_SOURCE_LINKADDR;
1679 lla_opt->len = 1;
1680 lla_opt->mac = eth_src;
b24ab67c
ZKL
1681
1682 if (with_mtu) {
86d46f3c 1683 /* ovs_nd_mtu_opt has the same size with ovs_nd_lla_opt. */
b24ab67c
ZKL
1684 struct ovs_nd_mtu_opt *mtu_opt
1685 = (struct ovs_nd_mtu_opt *)(lla_opt + 1);
1686 mtu_opt->type = ND_OPT_MTU;
1687 mtu_opt->len = 1;
1688 mtu_opt->reserved = 0;
4446661a 1689 put_16aligned_be32(&mtu_opt->mtu, htonl(mtu));
b24ab67c
ZKL
1690 }
1691
1692 ra->icmph.icmp6_cksum = 0;
1693 uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
1694 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
86d46f3c 1695 icmp_csum, ra, RA_MSG_LEN + ND_LLA_OPT_LEN + mtu_opt_len));
b24ab67c
ZKL
1696}
1697
1698/* Append an IPv6 Neighbor Discovery Prefix Information option to a
1699 * Router Advertisement message. */
1700void
1701packet_put_ra_prefix_opt(struct dp_packet *b,
1702 uint8_t plen, uint8_t la_flags,
1703 ovs_be32 valid_lifetime, ovs_be32 preferred_lifetime,
1704 const ovs_be128 prefix)
1705{
1706 size_t prev_l4_size = dp_packet_l4_size(b);
1707 struct ip6_hdr *nh = dp_packet_l3(b);
1708 nh->ip6_plen = htons(prev_l4_size + ND_PREFIX_OPT_LEN);
1709
1710 struct ovs_ra_msg *ra = dp_packet_l4(b);
481ada4d
NS
1711 struct ovs_nd_prefix_opt *prefix_opt =
1712 dp_packet_put_uninit(b, sizeof *prefix_opt);
b24ab67c
ZKL
1713 prefix_opt->type = ND_OPT_PREFIX_INFORMATION;
1714 prefix_opt->len = 4;
1715 prefix_opt->prefix_len = plen;
1716 prefix_opt->la_flags = la_flags;
1717 put_16aligned_be32(&prefix_opt->valid_lifetime, valid_lifetime);
1718 put_16aligned_be32(&prefix_opt->preferred_lifetime, preferred_lifetime);
1719 put_16aligned_be32(&prefix_opt->reserved, 0);
1720 memcpy(prefix_opt->prefix.be32, prefix.be32, sizeof(ovs_be32[4]));
1721
1722 ra->icmph.icmp6_cksum = 0;
1723 uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
1724 ra->icmph.icmp6_cksum = csum_finish(csum_continue(
1725 icmp_csum, ra, prev_l4_size + ND_PREFIX_OPT_LEN));
1726}
1727
0292a0c9
JG
1728uint32_t
1729packet_csum_pseudoheader(const struct ip_header *ip)
1730{
1731 uint32_t partial = 0;
1732
1733 partial = csum_add32(partial, get_16aligned_be32(&ip->ip_src));
1734 partial = csum_add32(partial, get_16aligned_be32(&ip->ip_dst));
1735 partial = csum_add16(partial, htons(ip->ip_proto));
1736 partial = csum_add16(partial, htons(ntohs(ip->ip_tot_len) -
1737 IP_IHL(ip->ip_ihl_ver) * 4));
1738
1739 return partial;
1740}
07659514 1741
370e373b
TLSC
1742#ifndef __CHECKER__
1743uint32_t
1744packet_csum_pseudoheader6(const struct ovs_16aligned_ip6_hdr *ip6)
1745{
1746 uint32_t partial = 0;
1747
cfa354cb
BP
1748 partial = csum_continue(partial, &ip6->ip6_src, sizeof ip6->ip6_src);
1749 partial = csum_continue(partial, &ip6->ip6_dst, sizeof ip6->ip6_dst);
c4bee4cb 1750 partial = csum_add16(partial, htons(ip6->ip6_nxt));
370e373b 1751 partial = csum_add16(partial, ip6->ip6_plen);
370e373b
TLSC
1752
1753 return partial;
1754}
46445c63
EC
1755
1756/* Calculate the IPv6 upper layer checksum according to RFC2460. We pass the
1757 ip6_nxt and ip6_plen values, so it will also work if extension headers
1758 are present. */
1759uint16_t
1760packet_csum_upperlayer6(const struct ovs_16aligned_ip6_hdr *ip6,
1761 const void *data, uint8_t l4_protocol,
1762 uint16_t l4_size)
1763{
1764 uint32_t partial = 0;
1765
1766 partial = csum_continue(partial, &ip6->ip6_src, sizeof ip6->ip6_src);
1767 partial = csum_continue(partial, &ip6->ip6_dst, sizeof ip6->ip6_dst);
1768 partial = csum_add16(partial, htons(l4_protocol));
1769 partial = csum_add16(partial, htons(l4_size));
1770
1771 partial = csum_continue(partial, data, l4_size);
1772
1773 return csum_finish(partial);
1774}
370e373b 1775#endif
1bc3f0ed
PS
1776
1777void
1778IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
1779{
1780 if (is_ipv6) {
1781 ovs_16aligned_be32 *ip6 = dp_packet_l3(pkt);
1782
1783 put_16aligned_be32(ip6, get_16aligned_be32(ip6) |
1784 htonl(IP_ECN_CE << 20));
1785 } else {
1786 struct ip_header *nh = dp_packet_l3(pkt);
1787 uint8_t tos = nh->ip_tos;
1788
1789 tos |= IP_ECN_CE;
1790 if (nh->ip_tos != tos) {
1791 nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
1792 htons((uint16_t) tos));
1793 nh->ip_tos = tos;
1794 }
1795 }
1796}