]>
Commit | Line | Data |
---|---|---|
2736b84e JG |
1 | /* |
2 | * Copyright (c) 2010 Nicira Networks. | |
3 | * Distributed under the terms of the GNU GPL version 2. | |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
9 | #include <linux/if_arp.h> | |
10 | #include <linux/if_ether.h> | |
11 | #include <linux/ip.h> | |
12 | #include <linux/if_tunnel.h> | |
13 | #include <linux/if_vlan.h> | |
14 | #include <linux/in.h> | |
15 | #include <linux/in_route.h> | |
16 | #include <linux/jhash.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/version.h> | |
19 | ||
20 | #include <net/dsfield.h> | |
21 | #include <net/dst.h> | |
22 | #include <net/icmp.h> | |
23 | #include <net/inet_ecn.h> | |
24 | #include <net/ip.h> | |
6f470982 | 25 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e | 26 | #include <net/ipv6.h> |
6f470982 | 27 | #endif |
2736b84e JG |
28 | #include <net/protocol.h> |
29 | #include <net/route.h> | |
30 | #include <net/xfrm.h> | |
31 | ||
32 | #include "actions.h" | |
33 | #include "datapath.h" | |
34 | #include "openvswitch/gre.h" | |
35 | #include "table.h" | |
36 | #include "vport.h" | |
b19e8815 | 37 | #include "vport-generic.h" |
2736b84e JG |
38 | |
39 | /* The absolute minimum fragment size. Note that there are many other | |
40 | * definitions of the minimum MTU. */ | |
41 | #define IP_MIN_MTU 68 | |
42 | ||
43 | /* The GRE header is composed of a series of sections: a base and then a variable | |
44 | * number of options. */ | |
45 | #define GRE_HEADER_SECTION 4 | |
46 | ||
27b6cec0 JG |
47 | struct gre_base_hdr { |
48 | __be16 flags; | |
49 | __be16 protocol; | |
50 | }; | |
51 | ||
2736b84e JG |
52 | struct mutable_config { |
53 | struct rcu_head rcu; | |
54 | ||
55 | unsigned char eth_addr[ETH_ALEN]; | |
56 | unsigned int mtu; | |
57 | struct gre_port_config port_config; | |
58 | ||
59 | int tunnel_hlen; /* Tunnel header length. */ | |
60 | }; | |
61 | ||
62 | struct gre_vport { | |
2848cb49 | 63 | struct rcu_head rcu; |
2736b84e JG |
64 | struct tbl_node tbl_node; |
65 | ||
66 | char name[IFNAMSIZ]; | |
67 | ||
68 | /* Protected by RCU. */ | |
69 | struct mutable_config *mutable; | |
70 | }; | |
71 | ||
2736b84e JG |
72 | /* Protected by RCU. */ |
73 | static struct tbl *port_table; | |
74 | ||
75 | /* These are just used as an optimization: they don't require any kind of | |
76 | * synchronization because we could have just as easily read the value before | |
77 | * the port change happened. */ | |
78 | static unsigned int key_local_remote_ports; | |
79 | static unsigned int key_remote_ports; | |
80 | static unsigned int local_remote_ports; | |
81 | static unsigned int remote_ports; | |
82 | ||
fceb2a5b | 83 | static inline struct gre_vport *gre_vport_priv(const struct vport *vport) |
2736b84e JG |
84 | { |
85 | return vport_priv(vport); | |
86 | } | |
87 | ||
fceb2a5b | 88 | static inline struct vport *gre_vport_to_vport(const struct gre_vport *gre_vport) |
2736b84e JG |
89 | { |
90 | return vport_from_priv(gre_vport); | |
91 | } | |
92 | ||
fceb2a5b | 93 | static inline struct gre_vport *gre_vport_table_cast(const struct tbl_node *node) |
2736b84e JG |
94 | { |
95 | return container_of(node, struct gre_vport, tbl_node); | |
96 | } | |
97 | ||
98 | /* RCU callback. */ | |
fceb2a5b | 99 | static void free_config(struct rcu_head *rcu) |
2736b84e JG |
100 | { |
101 | struct mutable_config *c = container_of(rcu, struct mutable_config, rcu); | |
102 | kfree(c); | |
103 | } | |
104 | ||
fceb2a5b JG |
105 | static void assign_config_rcu(struct vport *vport, |
106 | struct mutable_config *new_config) | |
2736b84e JG |
107 | { |
108 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
109 | struct mutable_config *old_config; | |
110 | ||
111 | old_config = rcu_dereference(gre_vport->mutable); | |
112 | rcu_assign_pointer(gre_vport->mutable, new_config); | |
113 | call_rcu(&old_config->rcu, free_config); | |
114 | } | |
115 | ||
fceb2a5b | 116 | static unsigned int *find_port_pool(const struct mutable_config *mutable) |
2736b84e JG |
117 | { |
118 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) { | |
119 | if (mutable->port_config.saddr) | |
120 | return &local_remote_ports; | |
121 | else | |
122 | return &remote_ports; | |
123 | } else { | |
124 | if (mutable->port_config.saddr) | |
125 | return &key_local_remote_ports; | |
126 | else | |
127 | return &key_remote_ports; | |
128 | } | |
129 | } | |
130 | ||
131 | enum lookup_key { | |
132 | LOOKUP_SADDR = 0, | |
133 | LOOKUP_DADDR = 1, | |
134 | LOOKUP_KEY = 2, | |
135 | LOOKUP_KEY_MATCH = 3 | |
136 | }; | |
137 | ||
138 | struct port_lookup_key { | |
139 | u32 vals[4]; /* Contains enum lookup_key keys. */ | |
140 | const struct mutable_config *mutable; | |
141 | }; | |
142 | ||
143 | /* Modifies 'target' to store the rcu_dereferenced pointer that was used to do | |
144 | * the comparision. */ | |
fceb2a5b | 145 | static int port_cmp(const struct tbl_node *node, void *target) |
2736b84e JG |
146 | { |
147 | const struct gre_vport *gre_vport = gre_vport_table_cast(node); | |
148 | struct port_lookup_key *lookup = target; | |
149 | ||
150 | lookup->mutable = rcu_dereference(gre_vport->mutable); | |
151 | ||
152 | return ((lookup->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) == | |
153 | lookup->vals[LOOKUP_KEY_MATCH]) && | |
154 | lookup->mutable->port_config.daddr == lookup->vals[LOOKUP_DADDR] && | |
155 | lookup->mutable->port_config.in_key == lookup->vals[LOOKUP_KEY] && | |
156 | lookup->mutable->port_config.saddr == lookup->vals[LOOKUP_SADDR]; | |
157 | } | |
158 | ||
fceb2a5b | 159 | static u32 port_hash(struct port_lookup_key *lookup) |
2736b84e JG |
160 | { |
161 | return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0); | |
162 | } | |
163 | ||
fceb2a5b | 164 | static int add_port(struct vport *vport) |
2736b84e JG |
165 | { |
166 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
167 | struct port_lookup_key lookup; | |
168 | int err; | |
169 | ||
170 | if (!port_table) { | |
171 | struct tbl *new_table; | |
172 | ||
173 | new_table = tbl_create(0); | |
174 | if (!new_table) | |
175 | return -ENOMEM; | |
176 | ||
177 | rcu_assign_pointer(port_table, new_table); | |
178 | ||
179 | } else if (tbl_count(port_table) > tbl_n_buckets(port_table)) { | |
180 | struct tbl *old_table = port_table; | |
181 | struct tbl *new_table; | |
182 | ||
183 | new_table = tbl_expand(old_table); | |
184 | if (IS_ERR(new_table)) | |
185 | return PTR_ERR(new_table); | |
186 | ||
187 | rcu_assign_pointer(port_table, new_table); | |
188 | tbl_deferred_destroy(old_table, NULL); | |
189 | } | |
190 | ||
191 | lookup.vals[LOOKUP_SADDR] = gre_vport->mutable->port_config.saddr; | |
192 | lookup.vals[LOOKUP_DADDR] = gre_vport->mutable->port_config.daddr; | |
193 | lookup.vals[LOOKUP_KEY] = gre_vport->mutable->port_config.in_key; | |
194 | lookup.vals[LOOKUP_KEY_MATCH] = gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH; | |
195 | ||
196 | err = tbl_insert(port_table, &gre_vport->tbl_node, port_hash(&lookup)); | |
197 | if (err) | |
198 | return err; | |
199 | ||
200 | (*find_port_pool(gre_vport->mutable))++; | |
201 | ||
202 | return 0; | |
203 | } | |
204 | ||
fceb2a5b | 205 | static int del_port(struct vport *vport) |
2736b84e JG |
206 | { |
207 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
208 | int err; | |
209 | ||
210 | err = tbl_remove(port_table, &gre_vport->tbl_node); | |
211 | if (err) | |
212 | return err; | |
213 | ||
214 | (*find_port_pool(gre_vport->mutable))--; | |
215 | ||
216 | return 0; | |
217 | } | |
218 | ||
219 | #define FIND_PORT_KEY (1 << 0) | |
220 | #define FIND_PORT_MATCH (1 << 1) | |
221 | #define FIND_PORT_ANY (FIND_PORT_KEY | FIND_PORT_MATCH) | |
222 | ||
fceb2a5b JG |
223 | static struct vport *find_port(__be32 saddr, __be32 daddr, __be32 key, |
224 | int port_type, | |
225 | const struct mutable_config **mutable) | |
2736b84e JG |
226 | { |
227 | struct port_lookup_key lookup; | |
228 | struct tbl *table = rcu_dereference(port_table); | |
229 | struct tbl_node *tbl_node; | |
230 | ||
231 | if (!table) | |
232 | return NULL; | |
233 | ||
234 | lookup.vals[LOOKUP_SADDR] = saddr; | |
235 | lookup.vals[LOOKUP_DADDR] = daddr; | |
236 | ||
237 | if (port_type & FIND_PORT_KEY) { | |
238 | lookup.vals[LOOKUP_KEY] = key; | |
239 | lookup.vals[LOOKUP_KEY_MATCH] = 0; | |
240 | ||
241 | if (key_local_remote_ports) { | |
242 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
243 | if (tbl_node) | |
244 | goto found; | |
245 | } | |
246 | ||
247 | if (key_remote_ports) { | |
248 | lookup.vals[LOOKUP_SADDR] = 0; | |
249 | ||
250 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
251 | if (tbl_node) | |
252 | goto found; | |
253 | ||
254 | lookup.vals[LOOKUP_SADDR] = saddr; | |
255 | } | |
256 | } | |
257 | ||
258 | if (port_type & FIND_PORT_MATCH) { | |
259 | lookup.vals[LOOKUP_KEY] = 0; | |
260 | lookup.vals[LOOKUP_KEY_MATCH] = GRE_F_IN_KEY_MATCH; | |
261 | ||
262 | if (local_remote_ports) { | |
263 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
264 | if (tbl_node) | |
265 | goto found; | |
266 | } | |
267 | ||
268 | if (remote_ports) { | |
269 | lookup.vals[LOOKUP_SADDR] = 0; | |
270 | ||
271 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
272 | if (tbl_node) | |
273 | goto found; | |
274 | } | |
275 | } | |
276 | ||
277 | return NULL; | |
278 | ||
279 | found: | |
280 | *mutable = lookup.mutable; | |
281 | return gre_vport_to_vport(gre_vport_table_cast(tbl_node)); | |
282 | } | |
283 | ||
fceb2a5b | 284 | static bool check_ipv4_address(__be32 addr) |
2736b84e JG |
285 | { |
286 | if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) | |
287 | || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr)) | |
288 | return false; | |
289 | ||
290 | return true; | |
291 | } | |
292 | ||
fceb2a5b | 293 | static bool ipv4_should_icmp(struct sk_buff *skb) |
2736b84e JG |
294 | { |
295 | struct iphdr *old_iph = ip_hdr(skb); | |
296 | ||
297 | /* Don't respond to L2 broadcast. */ | |
298 | if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) | |
299 | return false; | |
300 | ||
301 | /* Don't respond to L3 broadcast or invalid addresses. */ | |
302 | if (!check_ipv4_address(old_iph->daddr) || | |
303 | !check_ipv4_address(old_iph->saddr)) | |
304 | return false; | |
305 | ||
306 | /* Only respond to the first fragment. */ | |
307 | if (old_iph->frag_off & htons(IP_OFFSET)) | |
308 | return false; | |
309 | ||
310 | /* Don't respond to ICMP error messages. */ | |
311 | if (old_iph->protocol == IPPROTO_ICMP) { | |
312 | u8 icmp_type, *icmp_typep; | |
313 | ||
314 | icmp_typep = skb_header_pointer(skb, (u8 *)old_iph + | |
315 | (old_iph->ihl << 2) + | |
316 | offsetof(struct icmphdr, type) - | |
317 | skb->data, sizeof(icmp_type), | |
318 | &icmp_type); | |
319 | ||
320 | if (!icmp_typep) | |
321 | return false; | |
322 | ||
323 | if (*icmp_typep > NR_ICMP_TYPES | |
324 | || (*icmp_typep <= ICMP_PARAMETERPROB | |
325 | && *icmp_typep != ICMP_ECHOREPLY | |
326 | && *icmp_typep != ICMP_ECHO)) | |
327 | return false; | |
328 | } | |
329 | ||
330 | return true; | |
331 | } | |
332 | ||
fceb2a5b JG |
333 | static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, |
334 | unsigned int mtu, unsigned int payload_length) | |
2736b84e JG |
335 | { |
336 | struct iphdr *iph, *old_iph = ip_hdr(skb); | |
337 | struct icmphdr *icmph; | |
338 | u8 *payload; | |
339 | ||
340 | iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); | |
341 | icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); | |
342 | payload = skb_put(nskb, payload_length); | |
343 | ||
344 | /* IP */ | |
345 | iph->version = 4; | |
346 | iph->ihl = sizeof(struct iphdr) >> 2; | |
347 | iph->tos = (old_iph->tos & IPTOS_TOS_MASK) | | |
348 | IPTOS_PREC_INTERNETCONTROL; | |
349 | iph->tot_len = htons(sizeof(struct iphdr) | |
350 | + sizeof(struct icmphdr) | |
351 | + payload_length); | |
352 | get_random_bytes(&iph->id, sizeof(iph->id)); | |
353 | iph->frag_off = 0; | |
354 | iph->ttl = IPDEFTTL; | |
355 | iph->protocol = IPPROTO_ICMP; | |
356 | iph->daddr = old_iph->saddr; | |
357 | iph->saddr = old_iph->daddr; | |
358 | ||
359 | ip_send_check(iph); | |
360 | ||
361 | /* ICMP */ | |
362 | icmph->type = ICMP_DEST_UNREACH; | |
363 | icmph->code = ICMP_FRAG_NEEDED; | |
364 | icmph->un.gateway = htonl(mtu); | |
365 | icmph->checksum = 0; | |
366 | ||
367 | nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0); | |
368 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data, | |
369 | payload, payload_length, | |
370 | nskb->csum); | |
371 | icmph->checksum = csum_fold(nskb->csum); | |
372 | } | |
373 | ||
6f470982 | 374 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
fceb2a5b | 375 | static bool ipv6_should_icmp(struct sk_buff *skb) |
2736b84e JG |
376 | { |
377 | struct ipv6hdr *old_ipv6h = ipv6_hdr(skb); | |
378 | int addr_type; | |
379 | int payload_off = (u8 *)(old_ipv6h + 1) - skb->data; | |
380 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | |
381 | ||
382 | /* Check source address is valid. */ | |
383 | addr_type = ipv6_addr_type(&old_ipv6h->saddr); | |
384 | if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY) | |
385 | return false; | |
386 | ||
387 | /* Don't reply to unspecified addresses. */ | |
388 | if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY) | |
389 | return false; | |
390 | ||
391 | /* Don't respond to ICMP error messages. */ | |
392 | payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr); | |
393 | if (payload_off < 0) | |
394 | return false; | |
395 | ||
396 | if (nexthdr == NEXTHDR_ICMP) { | |
397 | u8 icmp_type, *icmp_typep; | |
398 | ||
399 | icmp_typep = skb_header_pointer(skb, payload_off + | |
400 | offsetof(struct icmp6hdr, | |
401 | icmp6_type), | |
402 | sizeof(icmp_type), &icmp_type); | |
403 | ||
404 | if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK)) | |
405 | return false; | |
406 | } | |
407 | ||
408 | return true; | |
409 | } | |
410 | ||
fceb2a5b JG |
411 | static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, |
412 | unsigned int mtu, unsigned int payload_length) | |
2736b84e JG |
413 | { |
414 | struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb); | |
415 | struct icmp6hdr *icmp6h; | |
416 | u8 *payload; | |
417 | ||
418 | ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr)); | |
419 | icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); | |
420 | payload = skb_put(nskb, payload_length); | |
421 | ||
422 | /* IPv6 */ | |
423 | ipv6h->version = 6; | |
424 | ipv6h->priority = 0; | |
425 | memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl)); | |
426 | ipv6h->payload_len = htons(sizeof(struct icmp6hdr) | |
427 | + payload_length); | |
428 | ipv6h->nexthdr = NEXTHDR_ICMP; | |
429 | ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT; | |
430 | ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr); | |
431 | ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr); | |
432 | ||
433 | /* ICMPv6 */ | |
434 | icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG; | |
435 | icmp6h->icmp6_code = 0; | |
436 | icmp6h->icmp6_cksum = 0; | |
437 | icmp6h->icmp6_mtu = htonl(mtu); | |
438 | ||
439 | nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0); | |
440 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data, | |
441 | payload, payload_length, | |
442 | nskb->csum); | |
443 | icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, | |
444 | sizeof(struct icmp6hdr) | |
445 | + payload_length, | |
446 | ipv6h->nexthdr, nskb->csum); | |
447 | } | |
6f470982 | 448 | #endif /* IPv6 */ |
2736b84e | 449 | |
fceb2a5b JG |
450 | static bool send_frag_needed(struct vport *vport, |
451 | const struct mutable_config *mutable, | |
452 | struct sk_buff *skb, unsigned int mtu, | |
453 | __be32 flow_key) | |
2736b84e JG |
454 | { |
455 | unsigned int eth_hdr_len = ETH_HLEN; | |
6f470982 | 456 | unsigned int total_length = 0, header_length = 0, payload_length; |
2736b84e JG |
457 | struct ethhdr *eh, *old_eh = eth_hdr(skb); |
458 | struct sk_buff *nskb; | |
459 | ||
460 | /* Sanity check */ | |
461 | if (skb->protocol == htons(ETH_P_IP)) { | |
462 | if (mtu < IP_MIN_MTU) | |
463 | return false; | |
464 | ||
465 | if (!ipv4_should_icmp(skb)) | |
466 | return true; | |
6f470982 JG |
467 | } |
468 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
469 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
470 | if (mtu < IPV6_MIN_MTU) |
471 | return false; | |
472 | ||
473 | /* In theory we should do PMTUD on IPv6 multicast messages but | |
474 | * we don't have an address to send from so just fragment. */ | |
475 | if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) | |
476 | return false; | |
477 | ||
478 | if (!ipv6_should_icmp(skb)) | |
479 | return true; | |
480 | } | |
6f470982 JG |
481 | #endif |
482 | else | |
483 | return false; | |
2736b84e JG |
484 | |
485 | /* Allocate */ | |
486 | if (old_eh->h_proto == htons(ETH_P_8021Q)) | |
487 | eth_hdr_len = VLAN_ETH_HLEN; | |
488 | ||
489 | payload_length = skb->len - eth_hdr_len; | |
490 | if (skb->protocol == htons(ETH_P_IP)) { | |
491 | header_length = sizeof(struct iphdr) + sizeof(struct icmphdr); | |
492 | total_length = min_t(unsigned int, header_length + | |
493 | payload_length, 576); | |
6f470982 JG |
494 | } |
495 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
496 | else { | |
2736b84e JG |
497 | header_length = sizeof(struct ipv6hdr) + |
498 | sizeof(struct icmp6hdr); | |
499 | total_length = min_t(unsigned int, header_length + | |
500 | payload_length, IPV6_MIN_MTU); | |
501 | } | |
6f470982 JG |
502 | #endif |
503 | ||
2736b84e JG |
504 | total_length = min(total_length, mutable->mtu); |
505 | payload_length = total_length - header_length; | |
506 | ||
507 | nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length + | |
508 | payload_length); | |
509 | if (!nskb) | |
510 | return false; | |
511 | ||
512 | skb_reserve(nskb, NET_IP_ALIGN); | |
513 | ||
514 | /* Ethernet / VLAN */ | |
515 | eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len); | |
516 | memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN); | |
517 | memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN); | |
518 | nskb->protocol = eh->h_proto = old_eh->h_proto; | |
519 | if (old_eh->h_proto == htons(ETH_P_8021Q)) { | |
520 | struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh; | |
521 | ||
522 | vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI; | |
523 | vh->h_vlan_encapsulated_proto = skb->protocol; | |
524 | } | |
525 | skb_reset_mac_header(nskb); | |
526 | ||
527 | /* Protocol */ | |
528 | if (skb->protocol == htons(ETH_P_IP)) | |
529 | ipv4_build_icmp(skb, nskb, mtu, payload_length); | |
6f470982 | 530 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
531 | else |
532 | ipv6_build_icmp(skb, nskb, mtu, payload_length); | |
6f470982 | 533 | #endif |
2736b84e JG |
534 | |
535 | /* Assume that flow based keys are symmetric with respect to input | |
536 | * and output and use the key that we were going to put on the | |
537 | * outgoing packet for the fake received packet. If the keys are | |
538 | * not symmetric then PMTUD needs to be disabled since we won't have | |
539 | * any way of synthesizing packets. */ | |
eea2aafb JG |
540 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH && |
541 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
542 | OVS_CB(nskb)->tun_id = flow_key; | |
2736b84e | 543 | |
f4267e34 | 544 | compute_ip_summed(nskb, false); |
2736b84e JG |
545 | vport_receive(vport, nskb); |
546 | ||
547 | return true; | |
548 | } | |
549 | ||
fceb2a5b | 550 | static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom) |
2736b84e | 551 | { |
7e7d587d | 552 | if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) { |
21256fab | 553 | struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16); |
2736b84e JG |
554 | if (!nskb) { |
555 | kfree_skb(skb); | |
556 | return ERR_PTR(-ENOMEM); | |
557 | } | |
558 | ||
559 | set_skb_csum_bits(skb, nskb); | |
560 | ||
561 | if (skb->sk) | |
562 | skb_set_owner_w(nskb, skb->sk); | |
563 | ||
564 | dev_kfree_skb(skb); | |
565 | return nskb; | |
566 | } | |
567 | ||
568 | return skb; | |
569 | } | |
570 | ||
fceb2a5b JG |
571 | static void create_gre_header(struct sk_buff *skb, |
572 | const struct mutable_config *mutable) | |
2736b84e JG |
573 | { |
574 | struct iphdr *iph = ip_hdr(skb); | |
27b6cec0 | 575 | struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1); |
2736b84e JG |
576 | __be32 *options = (__be32 *)((u8 *)iph + mutable->tunnel_hlen |
577 | - GRE_HEADER_SECTION); | |
578 | ||
27b6cec0 JG |
579 | greh->protocol = htons(ETH_P_TEB); |
580 | greh->flags = 0; | |
2736b84e JG |
581 | |
582 | /* Work backwards over the options so the checksum is last. */ | |
583 | if (mutable->port_config.out_key || | |
584 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) { | |
27b6cec0 | 585 | greh->flags |= GRE_KEY; |
2736b84e JG |
586 | |
587 | if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
588 | *options = OVS_CB(skb)->tun_id; | |
589 | else | |
590 | *options = mutable->port_config.out_key; | |
591 | ||
592 | options--; | |
593 | } | |
594 | ||
595 | if (mutable->port_config.flags & GRE_F_OUT_CSUM) { | |
27b6cec0 | 596 | greh->flags |= GRE_CSUM; |
2736b84e JG |
597 | |
598 | *options = 0; | |
599 | *(__sum16 *)options = csum_fold(skb_checksum(skb, | |
600 | sizeof(struct iphdr), | |
601 | skb->len - sizeof(struct iphdr), | |
602 | 0)); | |
603 | } | |
604 | } | |
605 | ||
fceb2a5b | 606 | static int check_checksum(struct sk_buff *skb) |
2736b84e JG |
607 | { |
608 | struct iphdr *iph = ip_hdr(skb); | |
609 | __be16 flags = *(__be16 *)(iph + 1); | |
610 | __sum16 csum = 0; | |
611 | ||
612 | if (flags & GRE_CSUM) { | |
613 | switch (skb->ip_summed) { | |
614 | case CHECKSUM_COMPLETE: | |
615 | csum = csum_fold(skb->csum); | |
616 | ||
617 | if (!csum) | |
618 | break; | |
619 | /* Fall through. */ | |
620 | ||
621 | case CHECKSUM_NONE: | |
622 | skb->csum = 0; | |
623 | csum = __skb_checksum_complete(skb); | |
624 | skb->ip_summed = CHECKSUM_COMPLETE; | |
625 | break; | |
626 | } | |
627 | } | |
628 | ||
629 | return (csum == 0); | |
630 | } | |
631 | ||
fceb2a5b | 632 | static int parse_gre_header(struct iphdr *iph, __be16 *flags, __be32 *key) |
2736b84e | 633 | { |
eea2aafb | 634 | /* IP and ICMP protocol handlers check that the IHL is valid. */ |
27b6cec0 JG |
635 | struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); |
636 | __be32 *options = (__be32 *)(greh + 1); | |
2736b84e JG |
637 | int hdr_len; |
638 | ||
27b6cec0 | 639 | *flags = greh->flags; |
2736b84e | 640 | |
27b6cec0 | 641 | if (greh->flags & (GRE_VERSION | GRE_ROUTING)) |
2736b84e JG |
642 | return -EINVAL; |
643 | ||
27b6cec0 | 644 | if (greh->protocol != htons(ETH_P_TEB)) |
2736b84e JG |
645 | return -EINVAL; |
646 | ||
647 | hdr_len = GRE_HEADER_SECTION; | |
648 | ||
27b6cec0 | 649 | if (greh->flags & GRE_CSUM) { |
2736b84e JG |
650 | hdr_len += GRE_HEADER_SECTION; |
651 | options++; | |
652 | } | |
653 | ||
27b6cec0 | 654 | if (greh->flags & GRE_KEY) { |
2736b84e JG |
655 | hdr_len += GRE_HEADER_SECTION; |
656 | ||
657 | *key = *options; | |
658 | options++; | |
659 | } else | |
660 | *key = 0; | |
661 | ||
27b6cec0 | 662 | if (greh->flags & GRE_SEQ) |
2736b84e JG |
663 | hdr_len += GRE_HEADER_SECTION; |
664 | ||
665 | return hdr_len; | |
666 | } | |
667 | ||
fceb2a5b | 668 | static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb) |
2736b84e JG |
669 | { |
670 | u8 inner; | |
671 | ||
672 | if (skb->protocol == htons(ETH_P_IP)) | |
673 | inner = ((struct iphdr *)skb_network_header(skb))->tos; | |
6f470982 | 674 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
675 | else if (skb->protocol == htons(ETH_P_IPV6)) |
676 | inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb)); | |
6f470982 | 677 | #endif |
2736b84e JG |
678 | else |
679 | inner = 0; | |
680 | ||
681 | return INET_ECN_encapsulate(tos, inner); | |
682 | } | |
683 | ||
fceb2a5b | 684 | static inline void ecn_decapsulate(u8 tos, struct sk_buff *skb) |
2736b84e JG |
685 | { |
686 | if (INET_ECN_is_ce(tos)) { | |
687 | __be16 protocol = skb->protocol; | |
688 | unsigned int nw_header = skb_network_header(skb) - skb->data; | |
689 | ||
690 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
691 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
692 | return; | |
693 | ||
694 | protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
695 | nw_header += VLAN_HLEN; | |
696 | } | |
697 | ||
698 | if (protocol == htons(ETH_P_IP)) { | |
699 | if (unlikely(!pskb_may_pull(skb, nw_header | |
700 | + sizeof(struct iphdr)))) | |
701 | return; | |
702 | ||
703 | IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data)); | |
6f470982 JG |
704 | } |
705 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
706 | else if (protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
707 | if (unlikely(!pskb_may_pull(skb, nw_header |
708 | + sizeof(struct ipv6hdr)))) | |
709 | return; | |
710 | ||
711 | IP6_ECN_set_ce((struct ipv6hdr *)(nw_header | |
712 | + skb->data)); | |
713 | } | |
6f470982 | 714 | #endif |
2736b84e JG |
715 | } |
716 | } | |
717 | ||
fceb2a5b | 718 | static struct sk_buff *handle_gso(struct sk_buff *skb) |
2736b84e JG |
719 | { |
720 | if (skb_is_gso(skb)) { | |
9cc8b4e4 | 721 | struct sk_buff *nskb = skb_gso_segment(skb, 0); |
2736b84e JG |
722 | |
723 | dev_kfree_skb(skb); | |
724 | return nskb; | |
725 | } | |
726 | ||
727 | return skb; | |
728 | } | |
729 | ||
fceb2a5b | 730 | static int handle_csum_offload(struct sk_buff *skb) |
2736b84e JG |
731 | { |
732 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
733 | return skb_checksum_help(skb); | |
1c6d11a8 JG |
734 | else { |
735 | skb->ip_summed = CHECKSUM_NONE; | |
2736b84e | 736 | return 0; |
1c6d11a8 | 737 | } |
2736b84e JG |
738 | } |
739 | ||
8819fac7 | 740 | /* Called with rcu_read_lock. */ |
fceb2a5b | 741 | static void gre_err(struct sk_buff *skb, u32 info) |
2736b84e JG |
742 | { |
743 | struct vport *vport; | |
744 | const struct mutable_config *mutable; | |
745 | const int type = icmp_hdr(skb)->type; | |
746 | const int code = icmp_hdr(skb)->code; | |
747 | int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); | |
748 | ||
749 | struct iphdr *iph; | |
750 | __be16 flags; | |
751 | __be32 key; | |
752 | int tunnel_hdr_len, tot_hdr_len; | |
753 | unsigned int orig_mac_header; | |
754 | unsigned int orig_nw_header; | |
755 | ||
756 | if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED) | |
757 | return; | |
758 | ||
759 | /* The mimimum size packet that we would actually be able to process: | |
760 | * encapsulating IP header, minimum GRE header, Ethernet header, | |
761 | * inner IPv4 header. */ | |
762 | if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION + | |
763 | ETH_HLEN + sizeof(struct iphdr))) | |
764 | return; | |
765 | ||
766 | iph = (struct iphdr *)skb->data; | |
767 | ||
768 | tunnel_hdr_len = parse_gre_header(iph, &flags, &key); | |
769 | if (tunnel_hdr_len < 0) | |
770 | return; | |
771 | ||
772 | vport = find_port(iph->saddr, iph->daddr, key, FIND_PORT_ANY, &mutable); | |
773 | if (!vport) | |
774 | return; | |
775 | ||
eea2aafb JG |
776 | /* Packets received by this function were previously sent by us, so |
777 | * any comparisons should be to the output values, not the input. | |
778 | * However, it's not really worth it to have a hash table based on | |
779 | * output keys (especially since ICMP error handling of tunneled packets | |
780 | * isn't that reliable anyways). Therefore, we do a lookup based on the | |
781 | * out key as if it were the in key and then check to see if the input | |
782 | * and output keys are the same. */ | |
783 | if (mutable->port_config.in_key != mutable->port_config.out_key) | |
784 | return; | |
785 | ||
786 | if (!!(mutable->port_config.flags & GRE_F_IN_KEY_MATCH) != | |
787 | !!(mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)) | |
788 | return; | |
789 | ||
790 | if ((mutable->port_config.flags & GRE_F_OUT_CSUM) && !(flags & GRE_CSUM)) | |
2736b84e JG |
791 | return; |
792 | ||
eea2aafb | 793 | tunnel_hdr_len += iph->ihl << 2; |
2736b84e JG |
794 | |
795 | orig_mac_header = skb_mac_header(skb) - skb->data; | |
796 | orig_nw_header = skb_network_header(skb) - skb->data; | |
eea2aafb | 797 | skb_set_mac_header(skb, tunnel_hdr_len); |
2736b84e | 798 | |
eea2aafb | 799 | tot_hdr_len = tunnel_hdr_len + ETH_HLEN; |
2736b84e JG |
800 | |
801 | skb->protocol = eth_hdr(skb)->h_proto; | |
802 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
803 | tot_hdr_len += VLAN_HLEN; | |
804 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
805 | } | |
806 | ||
eea2aafb JG |
807 | skb_set_network_header(skb, tot_hdr_len); |
808 | mtu -= tot_hdr_len; | |
809 | ||
2736b84e JG |
810 | if (skb->protocol == htons(ETH_P_IP)) |
811 | tot_hdr_len += sizeof(struct iphdr); | |
6f470982 | 812 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
eea2aafb | 813 | else if (skb->protocol == htons(ETH_P_IPV6)) |
2736b84e | 814 | tot_hdr_len += sizeof(struct ipv6hdr); |
6f470982 | 815 | #endif |
2736b84e JG |
816 | else |
817 | goto out; | |
818 | ||
819 | if (!pskb_may_pull(skb, tot_hdr_len)) | |
820 | goto out; | |
821 | ||
2736b84e JG |
822 | if (skb->protocol == htons(ETH_P_IP)) { |
823 | if (mtu < IP_MIN_MTU) { | |
824 | if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU) | |
825 | mtu = IP_MIN_MTU; | |
826 | else | |
827 | goto out; | |
828 | } | |
829 | ||
6f470982 JG |
830 | } |
831 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
832 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
833 | if (mtu < IPV6_MIN_MTU) { |
834 | unsigned int packet_length = sizeof(struct ipv6hdr) + | |
835 | ntohs(ipv6_hdr(skb)->payload_len); | |
836 | ||
837 | if (packet_length >= IPV6_MIN_MTU | |
838 | || ntohs(ipv6_hdr(skb)->payload_len) == 0) | |
839 | mtu = IPV6_MIN_MTU; | |
840 | else | |
841 | goto out; | |
842 | } | |
843 | } | |
6f470982 | 844 | #endif |
2736b84e JG |
845 | |
846 | __pskb_pull(skb, tunnel_hdr_len); | |
eea2aafb | 847 | send_frag_needed(vport, mutable, skb, mtu, key); |
2736b84e JG |
848 | skb_push(skb, tunnel_hdr_len); |
849 | ||
850 | out: | |
851 | skb_set_mac_header(skb, orig_mac_header); | |
852 | skb_set_network_header(skb, orig_nw_header); | |
853 | skb->protocol = htons(ETH_P_IP); | |
854 | } | |
855 | ||
8819fac7 | 856 | /* Called with rcu_read_lock. */ |
fceb2a5b | 857 | static int gre_rcv(struct sk_buff *skb) |
2736b84e JG |
858 | { |
859 | struct vport *vport; | |
860 | const struct mutable_config *mutable; | |
861 | int hdr_len; | |
862 | struct iphdr *iph; | |
863 | __be16 flags; | |
864 | __be32 key; | |
865 | ||
866 | if (!pskb_may_pull(skb, GRE_HEADER_SECTION + ETH_HLEN)) | |
867 | goto error; | |
868 | ||
869 | if (!check_checksum(skb)) | |
870 | goto error; | |
871 | ||
872 | iph = ip_hdr(skb); | |
873 | ||
874 | hdr_len = parse_gre_header(iph, &flags, &key); | |
875 | if (hdr_len < 0) | |
876 | goto error; | |
877 | ||
878 | vport = find_port(iph->daddr, iph->saddr, key, FIND_PORT_ANY, &mutable); | |
879 | if (!vport) { | |
880 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | |
881 | goto error; | |
882 | } | |
883 | ||
884 | if ((mutable->port_config.flags & GRE_F_IN_CSUM) && !(flags & GRE_CSUM)) { | |
885 | vport_record_error(vport, VPORT_E_RX_CRC); | |
886 | goto error; | |
887 | } | |
888 | ||
889 | if (!pskb_pull(skb, hdr_len) || !pskb_may_pull(skb, ETH_HLEN)) { | |
890 | vport_record_error(vport, VPORT_E_RX_ERROR); | |
891 | goto error; | |
892 | } | |
893 | ||
894 | skb->pkt_type = PACKET_HOST; | |
895 | skb->protocol = eth_type_trans(skb, skb->dev); | |
896 | skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); | |
897 | ||
898 | skb_dst_drop(skb); | |
899 | nf_reset(skb); | |
900 | secpath_reset(skb); | |
901 | skb_reset_network_header(skb); | |
902 | ||
903 | ecn_decapsulate(iph->tos, skb); | |
904 | ||
905 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) | |
906 | OVS_CB(skb)->tun_id = key; | |
907 | else | |
908 | OVS_CB(skb)->tun_id = 0; | |
909 | ||
910 | skb_push(skb, ETH_HLEN); | |
f4267e34 JG |
911 | compute_ip_summed(skb, false); |
912 | ||
2736b84e JG |
913 | vport_receive(vport, skb); |
914 | ||
915 | return 0; | |
916 | ||
917 | error: | |
918 | kfree_skb(skb); | |
919 | return 0; | |
920 | } | |
921 | ||
fceb2a5b JG |
922 | static int build_packet(struct vport *vport, const struct mutable_config *mutable, |
923 | struct iphdr *iph, struct rtable *rt, int max_headroom, | |
924 | int mtu, struct sk_buff *skb) | |
2736b84e JG |
925 | { |
926 | int err; | |
927 | struct iphdr *new_iph; | |
928 | int orig_len = skb->len; | |
929 | __be16 frag_off = iph->frag_off; | |
930 | ||
931 | skb = check_headroom(skb, max_headroom); | |
932 | if (unlikely(IS_ERR(skb))) | |
933 | goto error; | |
934 | ||
935 | err = handle_csum_offload(skb); | |
936 | if (err) | |
937 | goto error_free; | |
938 | ||
939 | if (skb->protocol == htons(ETH_P_IP)) { | |
940 | struct iphdr *old_iph = ip_hdr(skb); | |
941 | ||
942 | if ((old_iph->frag_off & htons(IP_DF)) && | |
943 | mtu < ntohs(old_iph->tot_len)) { | |
eea2aafb | 944 | if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) |
2736b84e JG |
945 | goto error_free; |
946 | } | |
947 | ||
6f470982 JG |
948 | } |
949 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
950 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
951 | unsigned int packet_length = skb->len - ETH_HLEN |
952 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
953 | ||
954 | /* IPv6 requires PMTUD if the packet is above the minimum MTU. */ | |
955 | if (packet_length > IPV6_MIN_MTU) | |
956 | frag_off = htons(IP_DF); | |
957 | ||
958 | if (mtu < packet_length) { | |
eea2aafb | 959 | if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) |
2736b84e JG |
960 | goto error_free; |
961 | } | |
962 | } | |
6f470982 | 963 | #endif |
2736b84e JG |
964 | |
965 | skb_reset_transport_header(skb); | |
966 | new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen); | |
967 | skb_reset_network_header(skb); | |
968 | ||
969 | memcpy(new_iph, iph, sizeof(struct iphdr)); | |
970 | new_iph->frag_off = frag_off; | |
971 | ip_select_ident(new_iph, &rt->u.dst, NULL); | |
972 | ||
973 | create_gre_header(skb, mutable); | |
974 | ||
1c6d11a8 JG |
975 | /* Allow our local IP stack to fragment the outer packet even if the |
976 | * DF bit is set as a last resort. */ | |
977 | skb->local_df = 1; | |
978 | ||
2736b84e JG |
979 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
980 | IPCB(skb)->flags = 0; | |
981 | ||
982 | err = ip_local_out(skb); | |
983 | if (likely(net_xmit_eval(err) == 0)) | |
984 | return orig_len; | |
985 | else { | |
986 | vport_record_error(vport, VPORT_E_TX_ERROR); | |
987 | return 0; | |
988 | } | |
989 | ||
990 | error_free: | |
991 | kfree_skb(skb); | |
992 | error: | |
993 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
994 | ||
995 | return 0; | |
996 | } | |
997 | ||
fceb2a5b | 998 | static int gre_send(struct vport *vport, struct sk_buff *skb) |
2736b84e JG |
999 | { |
1000 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1001 | const struct mutable_config *mutable = rcu_dereference(gre_vport->mutable); | |
1002 | ||
1003 | struct iphdr *old_iph; | |
2736b84e JG |
1004 | int orig_len; |
1005 | struct iphdr iph; | |
1006 | struct rtable *rt; | |
1007 | int max_headroom; | |
1008 | int mtu; | |
1009 | ||
1010 | /* Validate the protocol headers before we try to use them. */ | |
1011 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
1012 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
1013 | goto error_free; | |
1014 | ||
1015 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
1016 | skb_set_network_header(skb, VLAN_ETH_HLEN); | |
1017 | } | |
1018 | ||
1019 | if (skb->protocol == htons(ETH_P_IP)) { | |
1020 | if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) | |
1021 | + sizeof(struct iphdr) - skb->data))) | |
1022 | skb->protocol = 0; | |
6f470982 JG |
1023 | } |
1024 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
1025 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
1026 | if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) |
1027 | + sizeof(struct ipv6hdr) - skb->data))) | |
1028 | skb->protocol = 0; | |
1029 | } | |
6f470982 | 1030 | #endif |
2736b84e | 1031 | old_iph = ip_hdr(skb); |
2736b84e JG |
1032 | |
1033 | iph.tos = mutable->port_config.tos; | |
1034 | if (mutable->port_config.flags & GRE_F_TOS_INHERIT) { | |
1035 | if (skb->protocol == htons(ETH_P_IP)) | |
1036 | iph.tos = old_iph->tos; | |
6f470982 | 1037 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
1038 | else if (skb->protocol == htons(ETH_P_IPV6)) |
1039 | iph.tos = ipv6_get_dsfield(ipv6_hdr(skb)); | |
6f470982 | 1040 | #endif |
2736b84e JG |
1041 | } |
1042 | iph.tos = ecn_encapsulate(iph.tos, skb); | |
1043 | ||
1044 | { | |
1045 | struct flowi fl = { .nl_u = { .ip4_u = | |
1046 | { .daddr = mutable->port_config.daddr, | |
1047 | .saddr = mutable->port_config.saddr, | |
1048 | .tos = RT_TOS(iph.tos) } }, | |
1049 | .proto = IPPROTO_GRE }; | |
1050 | ||
1051 | if (ip_route_output_key(&init_net, &rt, &fl)) | |
1052 | goto error_free; | |
1053 | } | |
1054 | ||
1055 | iph.ttl = mutable->port_config.ttl; | |
1056 | if (mutable->port_config.flags & GRE_F_TTL_INHERIT) { | |
1057 | if (skb->protocol == htons(ETH_P_IP)) | |
1058 | iph.ttl = old_iph->ttl; | |
6f470982 | 1059 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e | 1060 | else if (skb->protocol == htons(ETH_P_IPV6)) |
6f470982 JG |
1061 | iph.ttl = ipv6_hdr(skb)->hop_limit; |
1062 | #endif | |
2736b84e JG |
1063 | } |
1064 | if (!iph.ttl) | |
1065 | iph.ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | |
1066 | ||
1067 | iph.frag_off = (mutable->port_config.flags & GRE_F_PMTUD) ? htons(IP_DF) : 0; | |
1068 | if (iph.frag_off) | |
1069 | mtu = dst_mtu(&rt->u.dst) | |
1070 | - ETH_HLEN | |
1071 | - mutable->tunnel_hlen | |
1072 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
1073 | else | |
1074 | mtu = mutable->mtu; | |
1075 | ||
1076 | if (skb->protocol == htons(ETH_P_IP)) { | |
1077 | iph.frag_off |= old_iph->frag_off & htons(IP_DF); | |
1078 | mtu = max(mtu, IP_MIN_MTU); | |
6f470982 JG |
1079 | } |
1080 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
1081 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
2736b84e | 1082 | mtu = max(mtu, IPV6_MIN_MTU); |
6f470982 | 1083 | #endif |
2736b84e JG |
1084 | |
1085 | iph.version = 4; | |
1086 | iph.ihl = sizeof(struct iphdr) >> 2; | |
1087 | iph.protocol = IPPROTO_GRE; | |
1088 | iph.daddr = rt->rt_dst; | |
1089 | iph.saddr = rt->rt_src; | |
1090 | ||
2736b84e JG |
1091 | nf_reset(skb); |
1092 | secpath_reset(skb); | |
1093 | skb_dst_drop(skb); | |
1094 | skb_dst_set(skb, &rt->u.dst); | |
2736b84e JG |
1095 | |
1096 | /* If we are doing GSO on a pskb it is better to make sure that the | |
1097 | * headroom is correct now. We will only have to copy the portion in | |
1098 | * the linear data area and GSO will preserve headroom when it creates | |
1099 | * the segments. This is particularly beneficial on Xen where we get | |
1100 | * lots of GSO pskbs. Conversely, we delay copying if it is just to | |
1101 | * get our own writable clone because GSO may do the copy for us. */ | |
54adbf40 JG |
1102 | max_headroom = LL_RESERVED_SPACE(rt->u.dst.dev) + rt->u.dst.header_len |
1103 | + mutable->tunnel_hlen; | |
1104 | ||
2736b84e JG |
1105 | if (skb_headroom(skb) < max_headroom) { |
1106 | skb = check_headroom(skb, max_headroom); | |
1107 | if (unlikely(IS_ERR(skb))) { | |
1108 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
1109 | goto error; | |
1110 | } | |
1111 | } | |
1112 | ||
1c6d11a8 | 1113 | forward_ip_summed(skb); |
b28c72ba JG |
1114 | |
1115 | if (unlikely(vswitch_skb_checksum_setup(skb))) | |
1116 | goto error_free; | |
1c6d11a8 | 1117 | |
2736b84e JG |
1118 | skb = handle_gso(skb); |
1119 | if (unlikely(IS_ERR(skb))) { | |
1120 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
1121 | goto error; | |
1122 | } | |
1123 | ||
1c6d11a8 | 1124 | /* Process GSO segments. Try to do any work for the entire packet that |
2736b84e JG |
1125 | * doesn't involve actually writing to it before this point. */ |
1126 | orig_len = 0; | |
1127 | do { | |
1128 | struct sk_buff *next_skb = skb->next; | |
1129 | skb->next = NULL; | |
1130 | ||
1131 | orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb); | |
1132 | ||
1133 | skb = next_skb; | |
1134 | } while (skb); | |
1135 | ||
1136 | return orig_len; | |
1137 | ||
1138 | error_free: | |
1139 | kfree_skb(skb); | |
1140 | vport_record_error(vport, VPORT_E_TX_ERROR); | |
1141 | error: | |
1142 | return 0; | |
1143 | } | |
1144 | ||
1145 | static struct net_protocol gre_protocol_handlers = { | |
1146 | .handler = gre_rcv, | |
1147 | .err_handler = gre_err, | |
1148 | }; | |
1149 | ||
fceb2a5b | 1150 | static int gre_init(void) |
2736b84e JG |
1151 | { |
1152 | int err; | |
1153 | ||
1154 | err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
1155 | if (err) | |
1156 | printk(KERN_WARNING "openvswitch: cannot register gre protocol handler\n"); | |
1157 | ||
1158 | return err; | |
1159 | } | |
1160 | ||
fceb2a5b | 1161 | static void gre_exit(void) |
2736b84e JG |
1162 | { |
1163 | tbl_destroy(port_table, NULL); | |
1164 | inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
1165 | } | |
1166 | ||
fceb2a5b JG |
1167 | static int set_config(const struct vport *cur_vport, |
1168 | struct mutable_config *mutable, const void __user *uconfig) | |
2736b84e JG |
1169 | { |
1170 | const struct vport *old_vport; | |
1171 | const struct mutable_config *old_mutable; | |
1172 | int port_type; | |
1173 | ||
1174 | if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct gre_port_config))) | |
1175 | return -EFAULT; | |
1176 | ||
1177 | if (mutable->port_config.daddr == 0) | |
1178 | return -EINVAL; | |
1179 | ||
1180 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) { | |
1181 | port_type = FIND_PORT_MATCH; | |
1182 | mutable->port_config.in_key = 0; | |
1183 | } else | |
1184 | port_type = FIND_PORT_KEY; | |
1185 | ||
1186 | old_vport = find_port(mutable->port_config.saddr, | |
1187 | mutable->port_config.daddr, | |
1188 | mutable->port_config.in_key, port_type, | |
1189 | &old_mutable); | |
1190 | ||
1191 | if (old_vport && old_vport != cur_vport) | |
1192 | return -EEXIST; | |
1193 | ||
eea2aafb JG |
1194 | if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) |
1195 | mutable->port_config.out_key = 0; | |
1196 | ||
2736b84e JG |
1197 | mutable->tunnel_hlen = sizeof(struct iphdr) + GRE_HEADER_SECTION; |
1198 | ||
1199 | if (mutable->port_config.flags & GRE_F_OUT_CSUM) | |
1200 | mutable->tunnel_hlen += GRE_HEADER_SECTION; | |
1201 | ||
1202 | if (mutable->port_config.out_key || | |
1203 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
1204 | mutable->tunnel_hlen += GRE_HEADER_SECTION; | |
1205 | ||
1206 | return 0; | |
1207 | } | |
1208 | ||
fceb2a5b | 1209 | static struct vport *gre_create(const char *name, const void __user *config) |
2736b84e JG |
1210 | { |
1211 | struct vport *vport; | |
1212 | struct gre_vport *gre_vport; | |
1213 | int err; | |
1214 | ||
1215 | vport = vport_alloc(sizeof(struct gre_vport), &gre_vport_ops); | |
1216 | if (IS_ERR(vport)) { | |
1217 | err = PTR_ERR(vport); | |
1218 | goto error; | |
1219 | } | |
1220 | ||
1221 | gre_vport = gre_vport_priv(vport); | |
1222 | ||
1223 | strcpy(gre_vport->name, name); | |
1224 | ||
1225 | gre_vport->mutable = kmalloc(sizeof(struct mutable_config), GFP_KERNEL); | |
1226 | if (!gre_vport->mutable) { | |
1227 | err = -ENOMEM; | |
1228 | goto error_free_vport; | |
1229 | } | |
1230 | ||
b19e8815 | 1231 | vport_gen_rand_ether_addr(gre_vport->mutable->eth_addr); |
2736b84e JG |
1232 | gre_vport->mutable->mtu = ETH_DATA_LEN; |
1233 | ||
1234 | err = set_config(NULL, gre_vport->mutable, config); | |
1235 | if (err) | |
1236 | goto error_free_mutable; | |
1237 | ||
1238 | err = add_port(vport); | |
1239 | if (err) | |
1240 | goto error_free_mutable; | |
1241 | ||
1242 | return vport; | |
1243 | ||
1244 | error_free_mutable: | |
1245 | kfree(gre_vport->mutable); | |
1246 | error_free_vport: | |
1247 | vport_free(vport); | |
1248 | error: | |
1249 | return ERR_PTR(err); | |
1250 | } | |
1251 | ||
fceb2a5b | 1252 | static int gre_modify(struct vport *vport, const void __user *config) |
2736b84e JG |
1253 | { |
1254 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1255 | struct mutable_config *mutable; | |
1256 | int err; | |
1257 | int update_hash = 0; | |
1258 | ||
1259 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1260 | if (!mutable) { | |
1261 | err = -ENOMEM; | |
1262 | goto error; | |
1263 | } | |
1264 | ||
1265 | err = set_config(vport, mutable, config); | |
1266 | if (err) | |
1267 | goto error_free; | |
1268 | ||
1269 | /* Only remove the port from the hash table if something that would | |
1270 | * affect the lookup has changed. */ | |
1271 | if (gre_vport->mutable->port_config.saddr != mutable->port_config.saddr || | |
1272 | gre_vport->mutable->port_config.daddr != mutable->port_config.daddr || | |
1273 | gre_vport->mutable->port_config.in_key != mutable->port_config.in_key || | |
1274 | (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) != | |
1275 | (mutable->port_config.flags & GRE_F_IN_KEY_MATCH)) | |
1276 | update_hash = 1; | |
1277 | ||
1278 | ||
1279 | /* This update is not atomic but the lookup uses the config, which | |
1280 | * serves as an inherent double check. */ | |
1281 | if (update_hash) { | |
1282 | err = del_port(vport); | |
1283 | if (err) | |
1284 | goto error_free; | |
1285 | } | |
1286 | ||
1287 | assign_config_rcu(vport, mutable); | |
1288 | ||
1289 | if (update_hash) { | |
1290 | err = add_port(vport); | |
1291 | if (err) | |
1292 | goto error_free; | |
1293 | } | |
1294 | ||
1295 | return 0; | |
1296 | ||
1297 | error_free: | |
1298 | kfree(mutable); | |
1299 | error: | |
1300 | return err; | |
1301 | } | |
1302 | ||
2848cb49 JG |
1303 | static void free_port(struct rcu_head *rcu) |
1304 | { | |
1305 | struct gre_vport *gre_vport = container_of(rcu, struct gre_vport, rcu); | |
1306 | ||
1307 | kfree(gre_vport->mutable); | |
1308 | vport_free(gre_vport_to_vport(gre_vport)); | |
1309 | } | |
1310 | ||
fceb2a5b | 1311 | static int gre_destroy(struct vport *vport) |
2736b84e JG |
1312 | { |
1313 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1314 | int port_type; | |
1315 | const struct mutable_config *old_mutable; | |
1316 | ||
1317 | /* Do a hash table lookup to make sure that the port exists. It should | |
1318 | * exist but might not if a modify failed earlier. */ | |
1319 | if (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) | |
1320 | port_type = FIND_PORT_MATCH; | |
1321 | else | |
1322 | port_type = FIND_PORT_KEY; | |
1323 | ||
1324 | if (vport == find_port(gre_vport->mutable->port_config.saddr, | |
1325 | gre_vport->mutable->port_config.daddr, | |
1326 | gre_vport->mutable->port_config.in_key, port_type, &old_mutable)) | |
1327 | del_port(vport); | |
1328 | ||
2848cb49 | 1329 | call_rcu(&gre_vport->rcu, free_port); |
2736b84e JG |
1330 | |
1331 | return 0; | |
1332 | } | |
1333 | ||
fceb2a5b | 1334 | static int gre_set_mtu(struct vport *vport, int mtu) |
2736b84e JG |
1335 | { |
1336 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1337 | struct mutable_config *mutable; | |
2736b84e JG |
1338 | |
1339 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1340 | if (!mutable) | |
1341 | return -ENOMEM; | |
1342 | ||
1343 | mutable->mtu = mtu; | |
1344 | assign_config_rcu(vport, mutable); | |
1345 | ||
2736b84e JG |
1346 | return 0; |
1347 | } | |
1348 | ||
fceb2a5b | 1349 | static int gre_set_addr(struct vport *vport, const unsigned char *addr) |
2736b84e JG |
1350 | { |
1351 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1352 | struct mutable_config *mutable; | |
1353 | ||
1354 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1355 | if (!mutable) | |
1356 | return -ENOMEM; | |
1357 | ||
1358 | memcpy(mutable->eth_addr, addr, ETH_ALEN); | |
1359 | assign_config_rcu(vport, mutable); | |
1360 | ||
1361 | return 0; | |
1362 | } | |
1363 | ||
1364 | ||
fceb2a5b | 1365 | static const char *gre_get_name(const struct vport *vport) |
2736b84e JG |
1366 | { |
1367 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1368 | return gre_vport->name; | |
1369 | } | |
1370 | ||
fceb2a5b | 1371 | static const unsigned char *gre_get_addr(const struct vport *vport) |
2736b84e JG |
1372 | { |
1373 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1374 | return rcu_dereference(gre_vport->mutable)->eth_addr; | |
1375 | } | |
1376 | ||
fceb2a5b | 1377 | static int gre_get_mtu(const struct vport *vport) |
2736b84e JG |
1378 | { |
1379 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1380 | return rcu_dereference(gre_vport->mutable)->mtu; | |
1381 | } | |
1382 | ||
1383 | struct vport_ops gre_vport_ops = { | |
1384 | .type = "gre", | |
1385 | .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID, | |
1386 | .init = gre_init, | |
1387 | .exit = gre_exit, | |
1388 | .create = gre_create, | |
1389 | .modify = gre_modify, | |
1390 | .destroy = gre_destroy, | |
1391 | .set_mtu = gre_set_mtu, | |
1392 | .set_addr = gre_set_addr, | |
1393 | .get_name = gre_get_name, | |
1394 | .get_addr = gre_get_addr, | |
b19e8815 JG |
1395 | .get_dev_flags = vport_gen_get_dev_flags, |
1396 | .is_running = vport_gen_is_running, | |
1397 | .get_operstate = vport_gen_get_operstate, | |
2736b84e JG |
1398 | .get_mtu = gre_get_mtu, |
1399 | .send = gre_send, | |
1400 | }; |