]>
Commit | Line | Data |
---|---|---|
2736b84e JG |
1 | /* |
2 | * Copyright (c) 2010 Nicira Networks. | |
3 | * Distributed under the terms of the GNU GPL version 2. | |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
9 | #include <linux/if_arp.h> | |
10 | #include <linux/if_ether.h> | |
11 | #include <linux/ip.h> | |
12 | #include <linux/if_tunnel.h> | |
13 | #include <linux/if_vlan.h> | |
14 | #include <linux/in.h> | |
15 | #include <linux/in_route.h> | |
16 | #include <linux/jhash.h> | |
17 | #include <linux/kernel.h> | |
18 | #include <linux/version.h> | |
19 | ||
20 | #include <net/dsfield.h> | |
21 | #include <net/dst.h> | |
22 | #include <net/icmp.h> | |
23 | #include <net/inet_ecn.h> | |
24 | #include <net/ip.h> | |
6f470982 | 25 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e | 26 | #include <net/ipv6.h> |
6f470982 | 27 | #endif |
2736b84e JG |
28 | #include <net/protocol.h> |
29 | #include <net/route.h> | |
30 | #include <net/xfrm.h> | |
31 | ||
32 | #include "actions.h" | |
33 | #include "datapath.h" | |
34 | #include "openvswitch/gre.h" | |
35 | #include "table.h" | |
36 | #include "vport.h" | |
b19e8815 | 37 | #include "vport-generic.h" |
2736b84e JG |
38 | |
39 | /* The absolute minimum fragment size. Note that there are many other | |
40 | * definitions of the minimum MTU. */ | |
41 | #define IP_MIN_MTU 68 | |
42 | ||
43 | /* The GRE header is composed of a series of sections: a base and then a variable | |
44 | * number of options. */ | |
45 | #define GRE_HEADER_SECTION 4 | |
46 | ||
acf8144f SH |
47 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) |
48 | #define rt_dst(rt) (rt->dst) | |
49 | #else | |
50 | #define rt_dst(rt) (rt->u.dst) | |
51 | #endif | |
52 | ||
27b6cec0 JG |
53 | struct gre_base_hdr { |
54 | __be16 flags; | |
55 | __be16 protocol; | |
56 | }; | |
57 | ||
2736b84e JG |
58 | struct mutable_config { |
59 | struct rcu_head rcu; | |
60 | ||
61 | unsigned char eth_addr[ETH_ALEN]; | |
62 | unsigned int mtu; | |
63 | struct gre_port_config port_config; | |
64 | ||
65 | int tunnel_hlen; /* Tunnel header length. */ | |
66 | }; | |
67 | ||
68 | struct gre_vport { | |
2848cb49 | 69 | struct rcu_head rcu; |
2736b84e JG |
70 | struct tbl_node tbl_node; |
71 | ||
72 | char name[IFNAMSIZ]; | |
73 | ||
74 | /* Protected by RCU. */ | |
75 | struct mutable_config *mutable; | |
76 | }; | |
77 | ||
2736b84e JG |
78 | /* Protected by RCU. */ |
79 | static struct tbl *port_table; | |
80 | ||
81 | /* These are just used as an optimization: they don't require any kind of | |
82 | * synchronization because we could have just as easily read the value before | |
83 | * the port change happened. */ | |
84 | static unsigned int key_local_remote_ports; | |
85 | static unsigned int key_remote_ports; | |
86 | static unsigned int local_remote_ports; | |
87 | static unsigned int remote_ports; | |
88 | ||
fceb2a5b | 89 | static inline struct gre_vport *gre_vport_priv(const struct vport *vport) |
2736b84e JG |
90 | { |
91 | return vport_priv(vport); | |
92 | } | |
93 | ||
fceb2a5b | 94 | static inline struct vport *gre_vport_to_vport(const struct gre_vport *gre_vport) |
2736b84e JG |
95 | { |
96 | return vport_from_priv(gre_vport); | |
97 | } | |
98 | ||
fceb2a5b | 99 | static inline struct gre_vport *gre_vport_table_cast(const struct tbl_node *node) |
2736b84e JG |
100 | { |
101 | return container_of(node, struct gre_vport, tbl_node); | |
102 | } | |
103 | ||
104 | /* RCU callback. */ | |
fceb2a5b | 105 | static void free_config(struct rcu_head *rcu) |
2736b84e JG |
106 | { |
107 | struct mutable_config *c = container_of(rcu, struct mutable_config, rcu); | |
108 | kfree(c); | |
109 | } | |
110 | ||
fceb2a5b JG |
111 | static void assign_config_rcu(struct vport *vport, |
112 | struct mutable_config *new_config) | |
2736b84e JG |
113 | { |
114 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
115 | struct mutable_config *old_config; | |
116 | ||
117 | old_config = rcu_dereference(gre_vport->mutable); | |
118 | rcu_assign_pointer(gre_vport->mutable, new_config); | |
119 | call_rcu(&old_config->rcu, free_config); | |
120 | } | |
121 | ||
fceb2a5b | 122 | static unsigned int *find_port_pool(const struct mutable_config *mutable) |
2736b84e JG |
123 | { |
124 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) { | |
125 | if (mutable->port_config.saddr) | |
126 | return &local_remote_ports; | |
127 | else | |
128 | return &remote_ports; | |
129 | } else { | |
130 | if (mutable->port_config.saddr) | |
131 | return &key_local_remote_ports; | |
132 | else | |
133 | return &key_remote_ports; | |
134 | } | |
135 | } | |
136 | ||
137 | enum lookup_key { | |
138 | LOOKUP_SADDR = 0, | |
139 | LOOKUP_DADDR = 1, | |
140 | LOOKUP_KEY = 2, | |
141 | LOOKUP_KEY_MATCH = 3 | |
142 | }; | |
143 | ||
144 | struct port_lookup_key { | |
145 | u32 vals[4]; /* Contains enum lookup_key keys. */ | |
146 | const struct mutable_config *mutable; | |
147 | }; | |
148 | ||
149 | /* Modifies 'target' to store the rcu_dereferenced pointer that was used to do | |
150 | * the comparision. */ | |
fceb2a5b | 151 | static int port_cmp(const struct tbl_node *node, void *target) |
2736b84e JG |
152 | { |
153 | const struct gre_vport *gre_vport = gre_vport_table_cast(node); | |
154 | struct port_lookup_key *lookup = target; | |
155 | ||
156 | lookup->mutable = rcu_dereference(gre_vport->mutable); | |
157 | ||
158 | return ((lookup->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) == | |
159 | lookup->vals[LOOKUP_KEY_MATCH]) && | |
160 | lookup->mutable->port_config.daddr == lookup->vals[LOOKUP_DADDR] && | |
161 | lookup->mutable->port_config.in_key == lookup->vals[LOOKUP_KEY] && | |
162 | lookup->mutable->port_config.saddr == lookup->vals[LOOKUP_SADDR]; | |
163 | } | |
164 | ||
fceb2a5b | 165 | static u32 port_hash(struct port_lookup_key *lookup) |
2736b84e JG |
166 | { |
167 | return jhash2(lookup->vals, ARRAY_SIZE(lookup->vals), 0); | |
168 | } | |
169 | ||
fceb2a5b | 170 | static int add_port(struct vport *vport) |
2736b84e JG |
171 | { |
172 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
173 | struct port_lookup_key lookup; | |
174 | int err; | |
175 | ||
176 | if (!port_table) { | |
177 | struct tbl *new_table; | |
178 | ||
179 | new_table = tbl_create(0); | |
180 | if (!new_table) | |
181 | return -ENOMEM; | |
182 | ||
183 | rcu_assign_pointer(port_table, new_table); | |
184 | ||
185 | } else if (tbl_count(port_table) > tbl_n_buckets(port_table)) { | |
186 | struct tbl *old_table = port_table; | |
187 | struct tbl *new_table; | |
188 | ||
189 | new_table = tbl_expand(old_table); | |
190 | if (IS_ERR(new_table)) | |
191 | return PTR_ERR(new_table); | |
192 | ||
193 | rcu_assign_pointer(port_table, new_table); | |
194 | tbl_deferred_destroy(old_table, NULL); | |
195 | } | |
196 | ||
197 | lookup.vals[LOOKUP_SADDR] = gre_vport->mutable->port_config.saddr; | |
198 | lookup.vals[LOOKUP_DADDR] = gre_vport->mutable->port_config.daddr; | |
199 | lookup.vals[LOOKUP_KEY] = gre_vport->mutable->port_config.in_key; | |
200 | lookup.vals[LOOKUP_KEY_MATCH] = gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH; | |
201 | ||
202 | err = tbl_insert(port_table, &gre_vport->tbl_node, port_hash(&lookup)); | |
203 | if (err) | |
204 | return err; | |
205 | ||
206 | (*find_port_pool(gre_vport->mutable))++; | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
fceb2a5b | 211 | static int del_port(struct vport *vport) |
2736b84e JG |
212 | { |
213 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
214 | int err; | |
215 | ||
216 | err = tbl_remove(port_table, &gre_vport->tbl_node); | |
217 | if (err) | |
218 | return err; | |
219 | ||
220 | (*find_port_pool(gre_vport->mutable))--; | |
221 | ||
222 | return 0; | |
223 | } | |
224 | ||
225 | #define FIND_PORT_KEY (1 << 0) | |
226 | #define FIND_PORT_MATCH (1 << 1) | |
227 | #define FIND_PORT_ANY (FIND_PORT_KEY | FIND_PORT_MATCH) | |
228 | ||
fceb2a5b JG |
229 | static struct vport *find_port(__be32 saddr, __be32 daddr, __be32 key, |
230 | int port_type, | |
231 | const struct mutable_config **mutable) | |
2736b84e JG |
232 | { |
233 | struct port_lookup_key lookup; | |
234 | struct tbl *table = rcu_dereference(port_table); | |
235 | struct tbl_node *tbl_node; | |
236 | ||
237 | if (!table) | |
238 | return NULL; | |
239 | ||
240 | lookup.vals[LOOKUP_SADDR] = saddr; | |
241 | lookup.vals[LOOKUP_DADDR] = daddr; | |
242 | ||
243 | if (port_type & FIND_PORT_KEY) { | |
244 | lookup.vals[LOOKUP_KEY] = key; | |
245 | lookup.vals[LOOKUP_KEY_MATCH] = 0; | |
246 | ||
247 | if (key_local_remote_ports) { | |
248 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
249 | if (tbl_node) | |
250 | goto found; | |
251 | } | |
252 | ||
253 | if (key_remote_ports) { | |
254 | lookup.vals[LOOKUP_SADDR] = 0; | |
255 | ||
256 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
257 | if (tbl_node) | |
258 | goto found; | |
259 | ||
260 | lookup.vals[LOOKUP_SADDR] = saddr; | |
261 | } | |
262 | } | |
263 | ||
264 | if (port_type & FIND_PORT_MATCH) { | |
265 | lookup.vals[LOOKUP_KEY] = 0; | |
266 | lookup.vals[LOOKUP_KEY_MATCH] = GRE_F_IN_KEY_MATCH; | |
267 | ||
268 | if (local_remote_ports) { | |
269 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
270 | if (tbl_node) | |
271 | goto found; | |
272 | } | |
273 | ||
274 | if (remote_ports) { | |
275 | lookup.vals[LOOKUP_SADDR] = 0; | |
276 | ||
277 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
278 | if (tbl_node) | |
279 | goto found; | |
280 | } | |
281 | } | |
282 | ||
283 | return NULL; | |
284 | ||
285 | found: | |
286 | *mutable = lookup.mutable; | |
287 | return gre_vport_to_vport(gre_vport_table_cast(tbl_node)); | |
288 | } | |
289 | ||
fceb2a5b | 290 | static bool check_ipv4_address(__be32 addr) |
2736b84e JG |
291 | { |
292 | if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) | |
293 | || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr)) | |
294 | return false; | |
295 | ||
296 | return true; | |
297 | } | |
298 | ||
fceb2a5b | 299 | static bool ipv4_should_icmp(struct sk_buff *skb) |
2736b84e JG |
300 | { |
301 | struct iphdr *old_iph = ip_hdr(skb); | |
302 | ||
303 | /* Don't respond to L2 broadcast. */ | |
304 | if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) | |
305 | return false; | |
306 | ||
307 | /* Don't respond to L3 broadcast or invalid addresses. */ | |
308 | if (!check_ipv4_address(old_iph->daddr) || | |
309 | !check_ipv4_address(old_iph->saddr)) | |
310 | return false; | |
311 | ||
312 | /* Only respond to the first fragment. */ | |
313 | if (old_iph->frag_off & htons(IP_OFFSET)) | |
314 | return false; | |
315 | ||
316 | /* Don't respond to ICMP error messages. */ | |
317 | if (old_iph->protocol == IPPROTO_ICMP) { | |
318 | u8 icmp_type, *icmp_typep; | |
319 | ||
320 | icmp_typep = skb_header_pointer(skb, (u8 *)old_iph + | |
321 | (old_iph->ihl << 2) + | |
322 | offsetof(struct icmphdr, type) - | |
323 | skb->data, sizeof(icmp_type), | |
324 | &icmp_type); | |
325 | ||
326 | if (!icmp_typep) | |
327 | return false; | |
328 | ||
329 | if (*icmp_typep > NR_ICMP_TYPES | |
330 | || (*icmp_typep <= ICMP_PARAMETERPROB | |
331 | && *icmp_typep != ICMP_ECHOREPLY | |
332 | && *icmp_typep != ICMP_ECHO)) | |
333 | return false; | |
334 | } | |
335 | ||
336 | return true; | |
337 | } | |
338 | ||
fceb2a5b JG |
339 | static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, |
340 | unsigned int mtu, unsigned int payload_length) | |
2736b84e JG |
341 | { |
342 | struct iphdr *iph, *old_iph = ip_hdr(skb); | |
343 | struct icmphdr *icmph; | |
344 | u8 *payload; | |
345 | ||
346 | iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); | |
347 | icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); | |
348 | payload = skb_put(nskb, payload_length); | |
349 | ||
350 | /* IP */ | |
351 | iph->version = 4; | |
352 | iph->ihl = sizeof(struct iphdr) >> 2; | |
353 | iph->tos = (old_iph->tos & IPTOS_TOS_MASK) | | |
354 | IPTOS_PREC_INTERNETCONTROL; | |
355 | iph->tot_len = htons(sizeof(struct iphdr) | |
356 | + sizeof(struct icmphdr) | |
357 | + payload_length); | |
358 | get_random_bytes(&iph->id, sizeof(iph->id)); | |
359 | iph->frag_off = 0; | |
360 | iph->ttl = IPDEFTTL; | |
361 | iph->protocol = IPPROTO_ICMP; | |
362 | iph->daddr = old_iph->saddr; | |
363 | iph->saddr = old_iph->daddr; | |
364 | ||
365 | ip_send_check(iph); | |
366 | ||
367 | /* ICMP */ | |
368 | icmph->type = ICMP_DEST_UNREACH; | |
369 | icmph->code = ICMP_FRAG_NEEDED; | |
370 | icmph->un.gateway = htonl(mtu); | |
371 | icmph->checksum = 0; | |
372 | ||
373 | nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0); | |
374 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data, | |
375 | payload, payload_length, | |
376 | nskb->csum); | |
377 | icmph->checksum = csum_fold(nskb->csum); | |
378 | } | |
379 | ||
6f470982 | 380 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
fceb2a5b | 381 | static bool ipv6_should_icmp(struct sk_buff *skb) |
2736b84e JG |
382 | { |
383 | struct ipv6hdr *old_ipv6h = ipv6_hdr(skb); | |
384 | int addr_type; | |
385 | int payload_off = (u8 *)(old_ipv6h + 1) - skb->data; | |
386 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | |
387 | ||
388 | /* Check source address is valid. */ | |
389 | addr_type = ipv6_addr_type(&old_ipv6h->saddr); | |
390 | if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY) | |
391 | return false; | |
392 | ||
393 | /* Don't reply to unspecified addresses. */ | |
394 | if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY) | |
395 | return false; | |
396 | ||
397 | /* Don't respond to ICMP error messages. */ | |
398 | payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr); | |
399 | if (payload_off < 0) | |
400 | return false; | |
401 | ||
402 | if (nexthdr == NEXTHDR_ICMP) { | |
403 | u8 icmp_type, *icmp_typep; | |
404 | ||
405 | icmp_typep = skb_header_pointer(skb, payload_off + | |
406 | offsetof(struct icmp6hdr, | |
407 | icmp6_type), | |
408 | sizeof(icmp_type), &icmp_type); | |
409 | ||
410 | if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK)) | |
411 | return false; | |
412 | } | |
413 | ||
414 | return true; | |
415 | } | |
416 | ||
fceb2a5b JG |
417 | static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, |
418 | unsigned int mtu, unsigned int payload_length) | |
2736b84e JG |
419 | { |
420 | struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb); | |
421 | struct icmp6hdr *icmp6h; | |
422 | u8 *payload; | |
423 | ||
424 | ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr)); | |
425 | icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); | |
426 | payload = skb_put(nskb, payload_length); | |
427 | ||
428 | /* IPv6 */ | |
429 | ipv6h->version = 6; | |
430 | ipv6h->priority = 0; | |
431 | memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl)); | |
432 | ipv6h->payload_len = htons(sizeof(struct icmp6hdr) | |
433 | + payload_length); | |
434 | ipv6h->nexthdr = NEXTHDR_ICMP; | |
435 | ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT; | |
436 | ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr); | |
437 | ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr); | |
438 | ||
439 | /* ICMPv6 */ | |
440 | icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG; | |
441 | icmp6h->icmp6_code = 0; | |
442 | icmp6h->icmp6_cksum = 0; | |
443 | icmp6h->icmp6_mtu = htonl(mtu); | |
444 | ||
445 | nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0); | |
446 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data, | |
447 | payload, payload_length, | |
448 | nskb->csum); | |
449 | icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, | |
450 | sizeof(struct icmp6hdr) | |
451 | + payload_length, | |
452 | ipv6h->nexthdr, nskb->csum); | |
453 | } | |
6f470982 | 454 | #endif /* IPv6 */ |
2736b84e | 455 | |
fceb2a5b JG |
456 | static bool send_frag_needed(struct vport *vport, |
457 | const struct mutable_config *mutable, | |
458 | struct sk_buff *skb, unsigned int mtu, | |
459 | __be32 flow_key) | |
2736b84e JG |
460 | { |
461 | unsigned int eth_hdr_len = ETH_HLEN; | |
6f470982 | 462 | unsigned int total_length = 0, header_length = 0, payload_length; |
2736b84e JG |
463 | struct ethhdr *eh, *old_eh = eth_hdr(skb); |
464 | struct sk_buff *nskb; | |
465 | ||
466 | /* Sanity check */ | |
467 | if (skb->protocol == htons(ETH_P_IP)) { | |
468 | if (mtu < IP_MIN_MTU) | |
469 | return false; | |
470 | ||
471 | if (!ipv4_should_icmp(skb)) | |
472 | return true; | |
6f470982 JG |
473 | } |
474 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
475 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
476 | if (mtu < IPV6_MIN_MTU) |
477 | return false; | |
478 | ||
479 | /* In theory we should do PMTUD on IPv6 multicast messages but | |
480 | * we don't have an address to send from so just fragment. */ | |
481 | if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) | |
482 | return false; | |
483 | ||
484 | if (!ipv6_should_icmp(skb)) | |
485 | return true; | |
486 | } | |
6f470982 JG |
487 | #endif |
488 | else | |
489 | return false; | |
2736b84e JG |
490 | |
491 | /* Allocate */ | |
492 | if (old_eh->h_proto == htons(ETH_P_8021Q)) | |
493 | eth_hdr_len = VLAN_ETH_HLEN; | |
494 | ||
495 | payload_length = skb->len - eth_hdr_len; | |
496 | if (skb->protocol == htons(ETH_P_IP)) { | |
497 | header_length = sizeof(struct iphdr) + sizeof(struct icmphdr); | |
498 | total_length = min_t(unsigned int, header_length + | |
499 | payload_length, 576); | |
6f470982 JG |
500 | } |
501 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
502 | else { | |
2736b84e JG |
503 | header_length = sizeof(struct ipv6hdr) + |
504 | sizeof(struct icmp6hdr); | |
505 | total_length = min_t(unsigned int, header_length + | |
506 | payload_length, IPV6_MIN_MTU); | |
507 | } | |
6f470982 JG |
508 | #endif |
509 | ||
2736b84e JG |
510 | total_length = min(total_length, mutable->mtu); |
511 | payload_length = total_length - header_length; | |
512 | ||
513 | nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length + | |
514 | payload_length); | |
515 | if (!nskb) | |
516 | return false; | |
517 | ||
518 | skb_reserve(nskb, NET_IP_ALIGN); | |
519 | ||
520 | /* Ethernet / VLAN */ | |
521 | eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len); | |
522 | memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN); | |
523 | memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN); | |
524 | nskb->protocol = eh->h_proto = old_eh->h_proto; | |
525 | if (old_eh->h_proto == htons(ETH_P_8021Q)) { | |
526 | struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh; | |
527 | ||
528 | vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI; | |
529 | vh->h_vlan_encapsulated_proto = skb->protocol; | |
530 | } | |
531 | skb_reset_mac_header(nskb); | |
532 | ||
533 | /* Protocol */ | |
534 | if (skb->protocol == htons(ETH_P_IP)) | |
535 | ipv4_build_icmp(skb, nskb, mtu, payload_length); | |
6f470982 | 536 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
537 | else |
538 | ipv6_build_icmp(skb, nskb, mtu, payload_length); | |
6f470982 | 539 | #endif |
2736b84e JG |
540 | |
541 | /* Assume that flow based keys are symmetric with respect to input | |
542 | * and output and use the key that we were going to put on the | |
543 | * outgoing packet for the fake received packet. If the keys are | |
544 | * not symmetric then PMTUD needs to be disabled since we won't have | |
545 | * any way of synthesizing packets. */ | |
eea2aafb JG |
546 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH && |
547 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
548 | OVS_CB(nskb)->tun_id = flow_key; | |
2736b84e | 549 | |
f4267e34 | 550 | compute_ip_summed(nskb, false); |
2736b84e JG |
551 | vport_receive(vport, nskb); |
552 | ||
553 | return true; | |
554 | } | |
555 | ||
fceb2a5b | 556 | static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom) |
2736b84e | 557 | { |
7e7d587d | 558 | if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) { |
21256fab | 559 | struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16); |
2736b84e JG |
560 | if (!nskb) { |
561 | kfree_skb(skb); | |
562 | return ERR_PTR(-ENOMEM); | |
563 | } | |
564 | ||
565 | set_skb_csum_bits(skb, nskb); | |
566 | ||
567 | if (skb->sk) | |
568 | skb_set_owner_w(nskb, skb->sk); | |
569 | ||
570 | dev_kfree_skb(skb); | |
571 | return nskb; | |
572 | } | |
573 | ||
574 | return skb; | |
575 | } | |
576 | ||
fceb2a5b JG |
577 | static void create_gre_header(struct sk_buff *skb, |
578 | const struct mutable_config *mutable) | |
2736b84e JG |
579 | { |
580 | struct iphdr *iph = ip_hdr(skb); | |
27b6cec0 | 581 | struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1); |
2736b84e JG |
582 | __be32 *options = (__be32 *)((u8 *)iph + mutable->tunnel_hlen |
583 | - GRE_HEADER_SECTION); | |
584 | ||
27b6cec0 JG |
585 | greh->protocol = htons(ETH_P_TEB); |
586 | greh->flags = 0; | |
2736b84e JG |
587 | |
588 | /* Work backwards over the options so the checksum is last. */ | |
589 | if (mutable->port_config.out_key || | |
590 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) { | |
27b6cec0 | 591 | greh->flags |= GRE_KEY; |
2736b84e JG |
592 | |
593 | if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
594 | *options = OVS_CB(skb)->tun_id; | |
595 | else | |
596 | *options = mutable->port_config.out_key; | |
597 | ||
598 | options--; | |
599 | } | |
600 | ||
16e9d4f6 | 601 | if (mutable->port_config.flags & GRE_F_CSUM) { |
27b6cec0 | 602 | greh->flags |= GRE_CSUM; |
2736b84e JG |
603 | |
604 | *options = 0; | |
605 | *(__sum16 *)options = csum_fold(skb_checksum(skb, | |
606 | sizeof(struct iphdr), | |
607 | skb->len - sizeof(struct iphdr), | |
608 | 0)); | |
609 | } | |
610 | } | |
611 | ||
fceb2a5b | 612 | static int check_checksum(struct sk_buff *skb) |
2736b84e JG |
613 | { |
614 | struct iphdr *iph = ip_hdr(skb); | |
615 | __be16 flags = *(__be16 *)(iph + 1); | |
616 | __sum16 csum = 0; | |
617 | ||
618 | if (flags & GRE_CSUM) { | |
619 | switch (skb->ip_summed) { | |
620 | case CHECKSUM_COMPLETE: | |
621 | csum = csum_fold(skb->csum); | |
622 | ||
623 | if (!csum) | |
624 | break; | |
625 | /* Fall through. */ | |
626 | ||
627 | case CHECKSUM_NONE: | |
628 | skb->csum = 0; | |
629 | csum = __skb_checksum_complete(skb); | |
630 | skb->ip_summed = CHECKSUM_COMPLETE; | |
631 | break; | |
632 | } | |
633 | } | |
634 | ||
635 | return (csum == 0); | |
636 | } | |
637 | ||
fceb2a5b | 638 | static int parse_gre_header(struct iphdr *iph, __be16 *flags, __be32 *key) |
2736b84e | 639 | { |
eea2aafb | 640 | /* IP and ICMP protocol handlers check that the IHL is valid. */ |
27b6cec0 JG |
641 | struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); |
642 | __be32 *options = (__be32 *)(greh + 1); | |
2736b84e JG |
643 | int hdr_len; |
644 | ||
27b6cec0 | 645 | *flags = greh->flags; |
2736b84e | 646 | |
27b6cec0 | 647 | if (greh->flags & (GRE_VERSION | GRE_ROUTING)) |
2736b84e JG |
648 | return -EINVAL; |
649 | ||
27b6cec0 | 650 | if (greh->protocol != htons(ETH_P_TEB)) |
2736b84e JG |
651 | return -EINVAL; |
652 | ||
653 | hdr_len = GRE_HEADER_SECTION; | |
654 | ||
27b6cec0 | 655 | if (greh->flags & GRE_CSUM) { |
2736b84e JG |
656 | hdr_len += GRE_HEADER_SECTION; |
657 | options++; | |
658 | } | |
659 | ||
27b6cec0 | 660 | if (greh->flags & GRE_KEY) { |
2736b84e JG |
661 | hdr_len += GRE_HEADER_SECTION; |
662 | ||
663 | *key = *options; | |
664 | options++; | |
665 | } else | |
666 | *key = 0; | |
667 | ||
27b6cec0 | 668 | if (greh->flags & GRE_SEQ) |
2736b84e JG |
669 | hdr_len += GRE_HEADER_SECTION; |
670 | ||
671 | return hdr_len; | |
672 | } | |
673 | ||
fceb2a5b | 674 | static inline u8 ecn_encapsulate(u8 tos, struct sk_buff *skb) |
2736b84e JG |
675 | { |
676 | u8 inner; | |
677 | ||
678 | if (skb->protocol == htons(ETH_P_IP)) | |
679 | inner = ((struct iphdr *)skb_network_header(skb))->tos; | |
6f470982 | 680 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
681 | else if (skb->protocol == htons(ETH_P_IPV6)) |
682 | inner = ipv6_get_dsfield((struct ipv6hdr *)skb_network_header(skb)); | |
6f470982 | 683 | #endif |
2736b84e JG |
684 | else |
685 | inner = 0; | |
686 | ||
687 | return INET_ECN_encapsulate(tos, inner); | |
688 | } | |
689 | ||
fceb2a5b | 690 | static inline void ecn_decapsulate(u8 tos, struct sk_buff *skb) |
2736b84e JG |
691 | { |
692 | if (INET_ECN_is_ce(tos)) { | |
693 | __be16 protocol = skb->protocol; | |
694 | unsigned int nw_header = skb_network_header(skb) - skb->data; | |
695 | ||
696 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
697 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
698 | return; | |
699 | ||
700 | protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
701 | nw_header += VLAN_HLEN; | |
702 | } | |
703 | ||
704 | if (protocol == htons(ETH_P_IP)) { | |
705 | if (unlikely(!pskb_may_pull(skb, nw_header | |
706 | + sizeof(struct iphdr)))) | |
707 | return; | |
708 | ||
709 | IP_ECN_set_ce((struct iphdr *)(nw_header + skb->data)); | |
6f470982 JG |
710 | } |
711 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
712 | else if (protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
713 | if (unlikely(!pskb_may_pull(skb, nw_header |
714 | + sizeof(struct ipv6hdr)))) | |
715 | return; | |
716 | ||
717 | IP6_ECN_set_ce((struct ipv6hdr *)(nw_header | |
718 | + skb->data)); | |
719 | } | |
6f470982 | 720 | #endif |
2736b84e JG |
721 | } |
722 | } | |
723 | ||
fceb2a5b | 724 | static struct sk_buff *handle_gso(struct sk_buff *skb) |
2736b84e JG |
725 | { |
726 | if (skb_is_gso(skb)) { | |
9cc8b4e4 | 727 | struct sk_buff *nskb = skb_gso_segment(skb, 0); |
2736b84e JG |
728 | |
729 | dev_kfree_skb(skb); | |
730 | return nskb; | |
731 | } | |
732 | ||
733 | return skb; | |
734 | } | |
735 | ||
fceb2a5b | 736 | static int handle_csum_offload(struct sk_buff *skb) |
2736b84e JG |
737 | { |
738 | if (skb->ip_summed == CHECKSUM_PARTIAL) | |
739 | return skb_checksum_help(skb); | |
1c6d11a8 JG |
740 | else { |
741 | skb->ip_summed = CHECKSUM_NONE; | |
2736b84e | 742 | return 0; |
1c6d11a8 | 743 | } |
2736b84e JG |
744 | } |
745 | ||
8819fac7 | 746 | /* Called with rcu_read_lock. */ |
fceb2a5b | 747 | static void gre_err(struct sk_buff *skb, u32 info) |
2736b84e JG |
748 | { |
749 | struct vport *vport; | |
750 | const struct mutable_config *mutable; | |
751 | const int type = icmp_hdr(skb)->type; | |
752 | const int code = icmp_hdr(skb)->code; | |
753 | int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); | |
754 | ||
755 | struct iphdr *iph; | |
756 | __be16 flags; | |
757 | __be32 key; | |
758 | int tunnel_hdr_len, tot_hdr_len; | |
759 | unsigned int orig_mac_header; | |
760 | unsigned int orig_nw_header; | |
761 | ||
762 | if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED) | |
763 | return; | |
764 | ||
765 | /* The mimimum size packet that we would actually be able to process: | |
766 | * encapsulating IP header, minimum GRE header, Ethernet header, | |
767 | * inner IPv4 header. */ | |
768 | if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION + | |
769 | ETH_HLEN + sizeof(struct iphdr))) | |
770 | return; | |
771 | ||
772 | iph = (struct iphdr *)skb->data; | |
773 | ||
774 | tunnel_hdr_len = parse_gre_header(iph, &flags, &key); | |
775 | if (tunnel_hdr_len < 0) | |
776 | return; | |
777 | ||
778 | vport = find_port(iph->saddr, iph->daddr, key, FIND_PORT_ANY, &mutable); | |
779 | if (!vport) | |
780 | return; | |
781 | ||
eea2aafb JG |
782 | /* Packets received by this function were previously sent by us, so |
783 | * any comparisons should be to the output values, not the input. | |
784 | * However, it's not really worth it to have a hash table based on | |
785 | * output keys (especially since ICMP error handling of tunneled packets | |
786 | * isn't that reliable anyways). Therefore, we do a lookup based on the | |
787 | * out key as if it were the in key and then check to see if the input | |
788 | * and output keys are the same. */ | |
789 | if (mutable->port_config.in_key != mutable->port_config.out_key) | |
790 | return; | |
791 | ||
792 | if (!!(mutable->port_config.flags & GRE_F_IN_KEY_MATCH) != | |
793 | !!(mutable->port_config.flags & GRE_F_OUT_KEY_ACTION)) | |
794 | return; | |
795 | ||
16e9d4f6 | 796 | if ((mutable->port_config.flags & GRE_F_CSUM) && !(flags & GRE_CSUM)) |
2736b84e JG |
797 | return; |
798 | ||
eea2aafb | 799 | tunnel_hdr_len += iph->ihl << 2; |
2736b84e JG |
800 | |
801 | orig_mac_header = skb_mac_header(skb) - skb->data; | |
802 | orig_nw_header = skb_network_header(skb) - skb->data; | |
eea2aafb | 803 | skb_set_mac_header(skb, tunnel_hdr_len); |
2736b84e | 804 | |
eea2aafb | 805 | tot_hdr_len = tunnel_hdr_len + ETH_HLEN; |
2736b84e JG |
806 | |
807 | skb->protocol = eth_hdr(skb)->h_proto; | |
808 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
809 | tot_hdr_len += VLAN_HLEN; | |
810 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
811 | } | |
812 | ||
eea2aafb JG |
813 | skb_set_network_header(skb, tot_hdr_len); |
814 | mtu -= tot_hdr_len; | |
815 | ||
2736b84e JG |
816 | if (skb->protocol == htons(ETH_P_IP)) |
817 | tot_hdr_len += sizeof(struct iphdr); | |
6f470982 | 818 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
eea2aafb | 819 | else if (skb->protocol == htons(ETH_P_IPV6)) |
2736b84e | 820 | tot_hdr_len += sizeof(struct ipv6hdr); |
6f470982 | 821 | #endif |
2736b84e JG |
822 | else |
823 | goto out; | |
824 | ||
825 | if (!pskb_may_pull(skb, tot_hdr_len)) | |
826 | goto out; | |
827 | ||
2736b84e JG |
828 | if (skb->protocol == htons(ETH_P_IP)) { |
829 | if (mtu < IP_MIN_MTU) { | |
830 | if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU) | |
831 | mtu = IP_MIN_MTU; | |
832 | else | |
833 | goto out; | |
834 | } | |
835 | ||
6f470982 JG |
836 | } |
837 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
838 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
839 | if (mtu < IPV6_MIN_MTU) { |
840 | unsigned int packet_length = sizeof(struct ipv6hdr) + | |
841 | ntohs(ipv6_hdr(skb)->payload_len); | |
842 | ||
843 | if (packet_length >= IPV6_MIN_MTU | |
844 | || ntohs(ipv6_hdr(skb)->payload_len) == 0) | |
845 | mtu = IPV6_MIN_MTU; | |
846 | else | |
847 | goto out; | |
848 | } | |
849 | } | |
6f470982 | 850 | #endif |
2736b84e JG |
851 | |
852 | __pskb_pull(skb, tunnel_hdr_len); | |
eea2aafb | 853 | send_frag_needed(vport, mutable, skb, mtu, key); |
2736b84e JG |
854 | skb_push(skb, tunnel_hdr_len); |
855 | ||
856 | out: | |
857 | skb_set_mac_header(skb, orig_mac_header); | |
858 | skb_set_network_header(skb, orig_nw_header); | |
859 | skb->protocol = htons(ETH_P_IP); | |
860 | } | |
861 | ||
8819fac7 | 862 | /* Called with rcu_read_lock. */ |
fceb2a5b | 863 | static int gre_rcv(struct sk_buff *skb) |
2736b84e JG |
864 | { |
865 | struct vport *vport; | |
866 | const struct mutable_config *mutable; | |
867 | int hdr_len; | |
868 | struct iphdr *iph; | |
869 | __be16 flags; | |
870 | __be32 key; | |
871 | ||
872 | if (!pskb_may_pull(skb, GRE_HEADER_SECTION + ETH_HLEN)) | |
873 | goto error; | |
874 | ||
875 | if (!check_checksum(skb)) | |
876 | goto error; | |
877 | ||
878 | iph = ip_hdr(skb); | |
879 | ||
880 | hdr_len = parse_gre_header(iph, &flags, &key); | |
881 | if (hdr_len < 0) | |
882 | goto error; | |
883 | ||
884 | vport = find_port(iph->daddr, iph->saddr, key, FIND_PORT_ANY, &mutable); | |
885 | if (!vport) { | |
886 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | |
887 | goto error; | |
888 | } | |
889 | ||
2736b84e JG |
890 | if (!pskb_pull(skb, hdr_len) || !pskb_may_pull(skb, ETH_HLEN)) { |
891 | vport_record_error(vport, VPORT_E_RX_ERROR); | |
892 | goto error; | |
893 | } | |
894 | ||
895 | skb->pkt_type = PACKET_HOST; | |
896 | skb->protocol = eth_type_trans(skb, skb->dev); | |
897 | skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); | |
898 | ||
899 | skb_dst_drop(skb); | |
900 | nf_reset(skb); | |
901 | secpath_reset(skb); | |
902 | skb_reset_network_header(skb); | |
903 | ||
904 | ecn_decapsulate(iph->tos, skb); | |
905 | ||
906 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) | |
907 | OVS_CB(skb)->tun_id = key; | |
908 | else | |
909 | OVS_CB(skb)->tun_id = 0; | |
910 | ||
911 | skb_push(skb, ETH_HLEN); | |
f4267e34 JG |
912 | compute_ip_summed(skb, false); |
913 | ||
2736b84e JG |
914 | vport_receive(vport, skb); |
915 | ||
916 | return 0; | |
917 | ||
918 | error: | |
919 | kfree_skb(skb); | |
920 | return 0; | |
921 | } | |
922 | ||
fceb2a5b JG |
923 | static int build_packet(struct vport *vport, const struct mutable_config *mutable, |
924 | struct iphdr *iph, struct rtable *rt, int max_headroom, | |
925 | int mtu, struct sk_buff *skb) | |
2736b84e JG |
926 | { |
927 | int err; | |
928 | struct iphdr *new_iph; | |
929 | int orig_len = skb->len; | |
930 | __be16 frag_off = iph->frag_off; | |
931 | ||
932 | skb = check_headroom(skb, max_headroom); | |
933 | if (unlikely(IS_ERR(skb))) | |
934 | goto error; | |
935 | ||
936 | err = handle_csum_offload(skb); | |
937 | if (err) | |
938 | goto error_free; | |
939 | ||
940 | if (skb->protocol == htons(ETH_P_IP)) { | |
941 | struct iphdr *old_iph = ip_hdr(skb); | |
942 | ||
943 | if ((old_iph->frag_off & htons(IP_DF)) && | |
944 | mtu < ntohs(old_iph->tot_len)) { | |
eea2aafb | 945 | if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) |
2736b84e JG |
946 | goto error_free; |
947 | } | |
948 | ||
6f470982 JG |
949 | } |
950 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
951 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
952 | unsigned int packet_length = skb->len - ETH_HLEN |
953 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
954 | ||
955 | /* IPv6 requires PMTUD if the packet is above the minimum MTU. */ | |
956 | if (packet_length > IPV6_MIN_MTU) | |
957 | frag_off = htons(IP_DF); | |
958 | ||
959 | if (mtu < packet_length) { | |
eea2aafb | 960 | if (send_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) |
2736b84e JG |
961 | goto error_free; |
962 | } | |
963 | } | |
6f470982 | 964 | #endif |
2736b84e JG |
965 | |
966 | skb_reset_transport_header(skb); | |
967 | new_iph = (struct iphdr *)skb_push(skb, mutable->tunnel_hlen); | |
968 | skb_reset_network_header(skb); | |
969 | ||
970 | memcpy(new_iph, iph, sizeof(struct iphdr)); | |
971 | new_iph->frag_off = frag_off; | |
acf8144f | 972 | ip_select_ident(new_iph, &rt_dst(rt), NULL); |
2736b84e JG |
973 | |
974 | create_gre_header(skb, mutable); | |
975 | ||
1c6d11a8 JG |
976 | /* Allow our local IP stack to fragment the outer packet even if the |
977 | * DF bit is set as a last resort. */ | |
978 | skb->local_df = 1; | |
979 | ||
2736b84e JG |
980 | memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); |
981 | IPCB(skb)->flags = 0; | |
982 | ||
983 | err = ip_local_out(skb); | |
984 | if (likely(net_xmit_eval(err) == 0)) | |
985 | return orig_len; | |
986 | else { | |
987 | vport_record_error(vport, VPORT_E_TX_ERROR); | |
988 | return 0; | |
989 | } | |
990 | ||
991 | error_free: | |
992 | kfree_skb(skb); | |
993 | error: | |
994 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
995 | ||
996 | return 0; | |
997 | } | |
998 | ||
fceb2a5b | 999 | static int gre_send(struct vport *vport, struct sk_buff *skb) |
2736b84e JG |
1000 | { |
1001 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1002 | const struct mutable_config *mutable = rcu_dereference(gre_vport->mutable); | |
1003 | ||
1004 | struct iphdr *old_iph; | |
2736b84e JG |
1005 | int orig_len; |
1006 | struct iphdr iph; | |
1007 | struct rtable *rt; | |
1008 | int max_headroom; | |
1009 | int mtu; | |
1010 | ||
1011 | /* Validate the protocol headers before we try to use them. */ | |
1012 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
1013 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
1014 | goto error_free; | |
1015 | ||
1016 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
1017 | skb_set_network_header(skb, VLAN_ETH_HLEN); | |
1018 | } | |
1019 | ||
1020 | if (skb->protocol == htons(ETH_P_IP)) { | |
1021 | if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) | |
1022 | + sizeof(struct iphdr) - skb->data))) | |
1023 | skb->protocol = 0; | |
6f470982 JG |
1024 | } |
1025 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
1026 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
1027 | if (unlikely(!pskb_may_pull(skb, skb_network_header(skb) |
1028 | + sizeof(struct ipv6hdr) - skb->data))) | |
1029 | skb->protocol = 0; | |
1030 | } | |
6f470982 | 1031 | #endif |
2736b84e | 1032 | old_iph = ip_hdr(skb); |
2736b84e JG |
1033 | |
1034 | iph.tos = mutable->port_config.tos; | |
1035 | if (mutable->port_config.flags & GRE_F_TOS_INHERIT) { | |
1036 | if (skb->protocol == htons(ETH_P_IP)) | |
1037 | iph.tos = old_iph->tos; | |
6f470982 | 1038 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e JG |
1039 | else if (skb->protocol == htons(ETH_P_IPV6)) |
1040 | iph.tos = ipv6_get_dsfield(ipv6_hdr(skb)); | |
6f470982 | 1041 | #endif |
2736b84e JG |
1042 | } |
1043 | iph.tos = ecn_encapsulate(iph.tos, skb); | |
1044 | ||
1045 | { | |
1046 | struct flowi fl = { .nl_u = { .ip4_u = | |
1047 | { .daddr = mutable->port_config.daddr, | |
1048 | .saddr = mutable->port_config.saddr, | |
1049 | .tos = RT_TOS(iph.tos) } }, | |
1050 | .proto = IPPROTO_GRE }; | |
1051 | ||
1052 | if (ip_route_output_key(&init_net, &rt, &fl)) | |
1053 | goto error_free; | |
1054 | } | |
1055 | ||
1056 | iph.ttl = mutable->port_config.ttl; | |
1057 | if (mutable->port_config.flags & GRE_F_TTL_INHERIT) { | |
1058 | if (skb->protocol == htons(ETH_P_IP)) | |
1059 | iph.ttl = old_iph->ttl; | |
6f470982 | 1060 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
2736b84e | 1061 | else if (skb->protocol == htons(ETH_P_IPV6)) |
6f470982 JG |
1062 | iph.ttl = ipv6_hdr(skb)->hop_limit; |
1063 | #endif | |
2736b84e JG |
1064 | } |
1065 | if (!iph.ttl) | |
acf8144f | 1066 | iph.ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); |
2736b84e JG |
1067 | |
1068 | iph.frag_off = (mutable->port_config.flags & GRE_F_PMTUD) ? htons(IP_DF) : 0; | |
1069 | if (iph.frag_off) | |
acf8144f | 1070 | mtu = dst_mtu(&rt_dst(rt)) |
2736b84e JG |
1071 | - ETH_HLEN |
1072 | - mutable->tunnel_hlen | |
1073 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
1074 | else | |
1075 | mtu = mutable->mtu; | |
1076 | ||
1077 | if (skb->protocol == htons(ETH_P_IP)) { | |
1078 | iph.frag_off |= old_iph->frag_off & htons(IP_DF); | |
1079 | mtu = max(mtu, IP_MIN_MTU); | |
6f470982 JG |
1080 | } |
1081 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
1082 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
2736b84e | 1083 | mtu = max(mtu, IPV6_MIN_MTU); |
6f470982 | 1084 | #endif |
2736b84e JG |
1085 | |
1086 | iph.version = 4; | |
1087 | iph.ihl = sizeof(struct iphdr) >> 2; | |
1088 | iph.protocol = IPPROTO_GRE; | |
1089 | iph.daddr = rt->rt_dst; | |
1090 | iph.saddr = rt->rt_src; | |
1091 | ||
2736b84e JG |
1092 | nf_reset(skb); |
1093 | secpath_reset(skb); | |
1094 | skb_dst_drop(skb); | |
acf8144f | 1095 | skb_dst_set(skb, &rt_dst(rt)); |
2736b84e JG |
1096 | |
1097 | /* If we are doing GSO on a pskb it is better to make sure that the | |
1098 | * headroom is correct now. We will only have to copy the portion in | |
1099 | * the linear data area and GSO will preserve headroom when it creates | |
1100 | * the segments. This is particularly beneficial on Xen where we get | |
1101 | * lots of GSO pskbs. Conversely, we delay copying if it is just to | |
1102 | * get our own writable clone because GSO may do the copy for us. */ | |
acf8144f | 1103 | max_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len |
54adbf40 JG |
1104 | + mutable->tunnel_hlen; |
1105 | ||
2736b84e JG |
1106 | if (skb_headroom(skb) < max_headroom) { |
1107 | skb = check_headroom(skb, max_headroom); | |
1108 | if (unlikely(IS_ERR(skb))) { | |
1109 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
1110 | goto error; | |
1111 | } | |
1112 | } | |
1113 | ||
1c6d11a8 | 1114 | forward_ip_summed(skb); |
b28c72ba JG |
1115 | |
1116 | if (unlikely(vswitch_skb_checksum_setup(skb))) | |
1117 | goto error_free; | |
1c6d11a8 | 1118 | |
2736b84e JG |
1119 | skb = handle_gso(skb); |
1120 | if (unlikely(IS_ERR(skb))) { | |
1121 | vport_record_error(vport, VPORT_E_TX_DROPPED); | |
1122 | goto error; | |
1123 | } | |
1124 | ||
1c6d11a8 | 1125 | /* Process GSO segments. Try to do any work for the entire packet that |
2736b84e JG |
1126 | * doesn't involve actually writing to it before this point. */ |
1127 | orig_len = 0; | |
1128 | do { | |
1129 | struct sk_buff *next_skb = skb->next; | |
1130 | skb->next = NULL; | |
1131 | ||
1132 | orig_len += build_packet(vport, mutable, &iph, rt, max_headroom, mtu, skb); | |
1133 | ||
1134 | skb = next_skb; | |
1135 | } while (skb); | |
1136 | ||
1137 | return orig_len; | |
1138 | ||
1139 | error_free: | |
1140 | kfree_skb(skb); | |
1141 | vport_record_error(vport, VPORT_E_TX_ERROR); | |
1142 | error: | |
1143 | return 0; | |
1144 | } | |
1145 | ||
1146 | static struct net_protocol gre_protocol_handlers = { | |
1147 | .handler = gre_rcv, | |
1148 | .err_handler = gre_err, | |
1149 | }; | |
1150 | ||
fceb2a5b | 1151 | static int gre_init(void) |
2736b84e JG |
1152 | { |
1153 | int err; | |
1154 | ||
1155 | err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
1156 | if (err) | |
1157 | printk(KERN_WARNING "openvswitch: cannot register gre protocol handler\n"); | |
1158 | ||
1159 | return err; | |
1160 | } | |
1161 | ||
fceb2a5b | 1162 | static void gre_exit(void) |
2736b84e JG |
1163 | { |
1164 | tbl_destroy(port_table, NULL); | |
1165 | inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
1166 | } | |
1167 | ||
fceb2a5b JG |
1168 | static int set_config(const struct vport *cur_vport, |
1169 | struct mutable_config *mutable, const void __user *uconfig) | |
2736b84e JG |
1170 | { |
1171 | const struct vport *old_vport; | |
1172 | const struct mutable_config *old_mutable; | |
1173 | int port_type; | |
1174 | ||
1175 | if (copy_from_user(&mutable->port_config, uconfig, sizeof(struct gre_port_config))) | |
1176 | return -EFAULT; | |
1177 | ||
1178 | if (mutable->port_config.daddr == 0) | |
1179 | return -EINVAL; | |
1180 | ||
1181 | if (mutable->port_config.flags & GRE_F_IN_KEY_MATCH) { | |
1182 | port_type = FIND_PORT_MATCH; | |
1183 | mutable->port_config.in_key = 0; | |
1184 | } else | |
1185 | port_type = FIND_PORT_KEY; | |
1186 | ||
1187 | old_vport = find_port(mutable->port_config.saddr, | |
1188 | mutable->port_config.daddr, | |
1189 | mutable->port_config.in_key, port_type, | |
1190 | &old_mutable); | |
1191 | ||
1192 | if (old_vport && old_vport != cur_vport) | |
1193 | return -EEXIST; | |
1194 | ||
eea2aafb JG |
1195 | if (mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) |
1196 | mutable->port_config.out_key = 0; | |
1197 | ||
2736b84e JG |
1198 | mutable->tunnel_hlen = sizeof(struct iphdr) + GRE_HEADER_SECTION; |
1199 | ||
16e9d4f6 | 1200 | if (mutable->port_config.flags & GRE_F_CSUM) |
2736b84e JG |
1201 | mutable->tunnel_hlen += GRE_HEADER_SECTION; |
1202 | ||
1203 | if (mutable->port_config.out_key || | |
1204 | mutable->port_config.flags & GRE_F_OUT_KEY_ACTION) | |
1205 | mutable->tunnel_hlen += GRE_HEADER_SECTION; | |
1206 | ||
1207 | return 0; | |
1208 | } | |
1209 | ||
fceb2a5b | 1210 | static struct vport *gre_create(const char *name, const void __user *config) |
2736b84e JG |
1211 | { |
1212 | struct vport *vport; | |
1213 | struct gre_vport *gre_vport; | |
1214 | int err; | |
1215 | ||
1216 | vport = vport_alloc(sizeof(struct gre_vport), &gre_vport_ops); | |
1217 | if (IS_ERR(vport)) { | |
1218 | err = PTR_ERR(vport); | |
1219 | goto error; | |
1220 | } | |
1221 | ||
1222 | gre_vport = gre_vport_priv(vport); | |
1223 | ||
1224 | strcpy(gre_vport->name, name); | |
1225 | ||
1226 | gre_vport->mutable = kmalloc(sizeof(struct mutable_config), GFP_KERNEL); | |
1227 | if (!gre_vport->mutable) { | |
1228 | err = -ENOMEM; | |
1229 | goto error_free_vport; | |
1230 | } | |
1231 | ||
b19e8815 | 1232 | vport_gen_rand_ether_addr(gre_vport->mutable->eth_addr); |
2736b84e JG |
1233 | gre_vport->mutable->mtu = ETH_DATA_LEN; |
1234 | ||
1235 | err = set_config(NULL, gre_vport->mutable, config); | |
1236 | if (err) | |
1237 | goto error_free_mutable; | |
1238 | ||
1239 | err = add_port(vport); | |
1240 | if (err) | |
1241 | goto error_free_mutable; | |
1242 | ||
1243 | return vport; | |
1244 | ||
1245 | error_free_mutable: | |
1246 | kfree(gre_vport->mutable); | |
1247 | error_free_vport: | |
1248 | vport_free(vport); | |
1249 | error: | |
1250 | return ERR_PTR(err); | |
1251 | } | |
1252 | ||
fceb2a5b | 1253 | static int gre_modify(struct vport *vport, const void __user *config) |
2736b84e JG |
1254 | { |
1255 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1256 | struct mutable_config *mutable; | |
1257 | int err; | |
1258 | int update_hash = 0; | |
1259 | ||
1260 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1261 | if (!mutable) { | |
1262 | err = -ENOMEM; | |
1263 | goto error; | |
1264 | } | |
1265 | ||
1266 | err = set_config(vport, mutable, config); | |
1267 | if (err) | |
1268 | goto error_free; | |
1269 | ||
1270 | /* Only remove the port from the hash table if something that would | |
1271 | * affect the lookup has changed. */ | |
1272 | if (gre_vport->mutable->port_config.saddr != mutable->port_config.saddr || | |
1273 | gre_vport->mutable->port_config.daddr != mutable->port_config.daddr || | |
1274 | gre_vport->mutable->port_config.in_key != mutable->port_config.in_key || | |
1275 | (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) != | |
1276 | (mutable->port_config.flags & GRE_F_IN_KEY_MATCH)) | |
1277 | update_hash = 1; | |
1278 | ||
1279 | ||
1280 | /* This update is not atomic but the lookup uses the config, which | |
1281 | * serves as an inherent double check. */ | |
1282 | if (update_hash) { | |
1283 | err = del_port(vport); | |
1284 | if (err) | |
1285 | goto error_free; | |
1286 | } | |
1287 | ||
1288 | assign_config_rcu(vport, mutable); | |
1289 | ||
1290 | if (update_hash) { | |
1291 | err = add_port(vport); | |
1292 | if (err) | |
1293 | goto error_free; | |
1294 | } | |
1295 | ||
1296 | return 0; | |
1297 | ||
1298 | error_free: | |
1299 | kfree(mutable); | |
1300 | error: | |
1301 | return err; | |
1302 | } | |
1303 | ||
2848cb49 JG |
1304 | static void free_port(struct rcu_head *rcu) |
1305 | { | |
1306 | struct gre_vport *gre_vport = container_of(rcu, struct gre_vport, rcu); | |
1307 | ||
1308 | kfree(gre_vport->mutable); | |
1309 | vport_free(gre_vport_to_vport(gre_vport)); | |
1310 | } | |
1311 | ||
fceb2a5b | 1312 | static int gre_destroy(struct vport *vport) |
2736b84e JG |
1313 | { |
1314 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1315 | int port_type; | |
1316 | const struct mutable_config *old_mutable; | |
1317 | ||
1318 | /* Do a hash table lookup to make sure that the port exists. It should | |
1319 | * exist but might not if a modify failed earlier. */ | |
1320 | if (gre_vport->mutable->port_config.flags & GRE_F_IN_KEY_MATCH) | |
1321 | port_type = FIND_PORT_MATCH; | |
1322 | else | |
1323 | port_type = FIND_PORT_KEY; | |
1324 | ||
1325 | if (vport == find_port(gre_vport->mutable->port_config.saddr, | |
1326 | gre_vport->mutable->port_config.daddr, | |
1327 | gre_vport->mutable->port_config.in_key, port_type, &old_mutable)) | |
1328 | del_port(vport); | |
1329 | ||
2848cb49 | 1330 | call_rcu(&gre_vport->rcu, free_port); |
2736b84e JG |
1331 | |
1332 | return 0; | |
1333 | } | |
1334 | ||
fceb2a5b | 1335 | static int gre_set_mtu(struct vport *vport, int mtu) |
2736b84e JG |
1336 | { |
1337 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1338 | struct mutable_config *mutable; | |
2736b84e JG |
1339 | |
1340 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1341 | if (!mutable) | |
1342 | return -ENOMEM; | |
1343 | ||
1344 | mutable->mtu = mtu; | |
1345 | assign_config_rcu(vport, mutable); | |
1346 | ||
2736b84e JG |
1347 | return 0; |
1348 | } | |
1349 | ||
fceb2a5b | 1350 | static int gre_set_addr(struct vport *vport, const unsigned char *addr) |
2736b84e JG |
1351 | { |
1352 | struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1353 | struct mutable_config *mutable; | |
1354 | ||
1355 | mutable = kmemdup(gre_vport->mutable, sizeof(struct mutable_config), GFP_KERNEL); | |
1356 | if (!mutable) | |
1357 | return -ENOMEM; | |
1358 | ||
1359 | memcpy(mutable->eth_addr, addr, ETH_ALEN); | |
1360 | assign_config_rcu(vport, mutable); | |
1361 | ||
1362 | return 0; | |
1363 | } | |
1364 | ||
1365 | ||
fceb2a5b | 1366 | static const char *gre_get_name(const struct vport *vport) |
2736b84e JG |
1367 | { |
1368 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1369 | return gre_vport->name; | |
1370 | } | |
1371 | ||
fceb2a5b | 1372 | static const unsigned char *gre_get_addr(const struct vport *vport) |
2736b84e JG |
1373 | { |
1374 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1375 | return rcu_dereference(gre_vport->mutable)->eth_addr; | |
1376 | } | |
1377 | ||
fceb2a5b | 1378 | static int gre_get_mtu(const struct vport *vport) |
2736b84e JG |
1379 | { |
1380 | const struct gre_vport *gre_vport = gre_vport_priv(vport); | |
1381 | return rcu_dereference(gre_vport->mutable)->mtu; | |
1382 | } | |
1383 | ||
1384 | struct vport_ops gre_vport_ops = { | |
1385 | .type = "gre", | |
1386 | .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID, | |
1387 | .init = gre_init, | |
1388 | .exit = gre_exit, | |
1389 | .create = gre_create, | |
1390 | .modify = gre_modify, | |
1391 | .destroy = gre_destroy, | |
1392 | .set_mtu = gre_set_mtu, | |
1393 | .set_addr = gre_set_addr, | |
1394 | .get_name = gre_get_name, | |
1395 | .get_addr = gre_get_addr, | |
b19e8815 JG |
1396 | .get_dev_flags = vport_gen_get_dev_flags, |
1397 | .is_running = vport_gen_is_running, | |
1398 | .get_operstate = vport_gen_get_operstate, | |
2736b84e JG |
1399 | .get_mtu = gre_get_mtu, |
1400 | .send = gre_send, | |
1401 | }; |