]>
Commit | Line | Data |
---|---|---|
d1eb60cc JG |
1 | /* |
2 | * Copyright (c) 2010 Nicira Networks. | |
3 | * Distributed under the terms of the GNU GPL version 2. | |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
9 | #include <linux/if_arp.h> | |
10 | #include <linux/if_ether.h> | |
11 | #include <linux/ip.h> | |
12 | #include <linux/if_vlan.h> | |
13 | #include <linux/in.h> | |
14 | #include <linux/in_route.h> | |
15 | #include <linux/jhash.h> | |
16 | #include <linux/kernel.h> | |
17 | #include <linux/version.h> | |
842cf6f4 | 18 | #include <linux/workqueue.h> |
d1eb60cc JG |
19 | |
20 | #include <net/dsfield.h> | |
21 | #include <net/dst.h> | |
22 | #include <net/icmp.h> | |
23 | #include <net/inet_ecn.h> | |
24 | #include <net/ip.h> | |
25 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
26 | #include <net/ipv6.h> | |
27 | #endif | |
28 | #include <net/route.h> | |
29 | #include <net/xfrm.h> | |
30 | ||
31 | #include "actions.h" | |
dd8d6b8c | 32 | #include "checksum.h" |
d1eb60cc JG |
33 | #include "datapath.h" |
34 | #include "table.h" | |
35 | #include "tunnel.h" | |
36 | #include "vport.h" | |
37 | #include "vport-generic.h" | |
842cf6f4 JG |
38 | #include "vport-internal_dev.h" |
39 | ||
40 | #ifdef NEED_CACHE_TIMEOUT | |
41 | /* | |
42 | * On kernels where we can't quickly detect changes in the rest of the system | |
43 | * we use an expiration time to invalidate the cache. A shorter expiration | |
44 | * reduces the length of time that we may potentially blackhole packets while | |
45 | * a longer time increases performance by reducing the frequency that the | |
46 | * cache needs to be rebuilt. A variety of factors may cause the cache to be | |
47 | * invalidated before the expiration time but this is the maximum. The time | |
48 | * is expressed in jiffies. | |
49 | */ | |
50 | #define MAX_CACHE_EXP HZ | |
51 | #endif | |
52 | ||
53 | /* | |
54 | * Interval to check for and remove caches that are no longer valid. Caches | |
55 | * are checked for validity before they are used for packet encapsulation and | |
56 | * old caches are removed at that time. However, if no packets are sent through | |
57 | * the tunnel then the cache will never be destroyed. Since it holds | |
58 | * references to a number of system objects, the cache will continue to use | |
59 | * system resources by not allowing those objects to be destroyed. The cache | |
60 | * cleaner is periodically run to free invalid caches. It does not | |
61 | * significantly affect system performance. A lower interval will release | |
62 | * resources faster but will itself consume resources by requiring more frequent | |
63 | * checks. A longer interval may result in messages being printed to the kernel | |
64 | * message buffer about unreleased resources. The interval is expressed in | |
65 | * jiffies. | |
66 | */ | |
67 | #define CACHE_CLEANER_INTERVAL (5 * HZ) | |
68 | ||
69 | #define CACHE_DATA_ALIGN 16 | |
d1eb60cc | 70 | |
e1040c77 | 71 | static struct tbl __rcu *port_table __read_mostly; |
d1eb60cc | 72 | |
842cf6f4 | 73 | static void cache_cleaner(struct work_struct *work); |
33b38b63 | 74 | static DECLARE_DELAYED_WORK(cache_cleaner_wq, cache_cleaner); |
842cf6f4 | 75 | |
d1eb60cc JG |
76 | /* |
77 | * These are just used as an optimization: they don't require any kind of | |
78 | * synchronization because we could have just as easily read the value before | |
79 | * the port change happened. | |
80 | */ | |
83e3e75b JG |
81 | static unsigned int key_local_remote_ports __read_mostly; |
82 | static unsigned int key_remote_ports __read_mostly; | |
83 | static unsigned int local_remote_ports __read_mostly; | |
84 | static unsigned int remote_ports __read_mostly; | |
d1eb60cc JG |
85 | |
86 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) | |
87 | #define rt_dst(rt) (rt->dst) | |
88 | #else | |
89 | #define rt_dst(rt) (rt->u.dst) | |
90 | #endif | |
91 | ||
92 | static inline struct vport *tnl_vport_to_vport(const struct tnl_vport *tnl_vport) | |
93 | { | |
94 | return vport_from_priv(tnl_vport); | |
95 | } | |
96 | ||
97 | static inline struct tnl_vport *tnl_vport_table_cast(const struct tbl_node *node) | |
98 | { | |
99 | return container_of(node, struct tnl_vport, tbl_node); | |
100 | } | |
101 | ||
758a12d2 JG |
102 | /* This is analogous to rtnl_dereference for the tunnel cache. It checks that |
103 | * cache_lock is held, so it is only for update side code. | |
104 | */ | |
105 | static inline struct tnl_cache *cache_dereference(struct tnl_vport *tnl_vport) | |
106 | { | |
107 | return rcu_dereference_protected(tnl_vport->cache, | |
108 | lockdep_is_held(&tnl_vport->cache_lock)); | |
109 | } | |
110 | ||
842cf6f4 JG |
111 | static inline void schedule_cache_cleaner(void) |
112 | { | |
113 | schedule_delayed_work(&cache_cleaner_wq, CACHE_CLEANER_INTERVAL); | |
114 | } | |
115 | ||
116 | static void free_cache(struct tnl_cache *cache) | |
117 | { | |
118 | if (!cache) | |
119 | return; | |
120 | ||
121 | flow_put(cache->flow); | |
122 | ip_rt_put(cache->rt); | |
123 | kfree(cache); | |
124 | } | |
125 | ||
126 | static void free_config_rcu(struct rcu_head *rcu) | |
d1eb60cc JG |
127 | { |
128 | struct tnl_mutable_config *c = container_of(rcu, struct tnl_mutable_config, rcu); | |
129 | kfree(c); | |
130 | } | |
131 | ||
842cf6f4 JG |
132 | static void free_cache_rcu(struct rcu_head *rcu) |
133 | { | |
134 | struct tnl_cache *c = container_of(rcu, struct tnl_cache, rcu); | |
135 | free_cache(c); | |
136 | } | |
137 | ||
d1eb60cc JG |
138 | static void assign_config_rcu(struct vport *vport, |
139 | struct tnl_mutable_config *new_config) | |
140 | { | |
141 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
142 | struct tnl_mutable_config *old_config; | |
143 | ||
ad919711 | 144 | old_config = rtnl_dereference(tnl_vport->mutable); |
d1eb60cc | 145 | rcu_assign_pointer(tnl_vport->mutable, new_config); |
842cf6f4 JG |
146 | call_rcu(&old_config->rcu, free_config_rcu); |
147 | } | |
148 | ||
149 | static void assign_cache_rcu(struct vport *vport, struct tnl_cache *new_cache) | |
150 | { | |
151 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
152 | struct tnl_cache *old_cache; | |
153 | ||
758a12d2 | 154 | old_cache = cache_dereference(tnl_vport); |
842cf6f4 JG |
155 | rcu_assign_pointer(tnl_vport->cache, new_cache); |
156 | ||
157 | if (old_cache) | |
158 | call_rcu(&old_cache->rcu, free_cache_rcu); | |
d1eb60cc JG |
159 | } |
160 | ||
161 | static unsigned int *find_port_pool(const struct tnl_mutable_config *mutable) | |
162 | { | |
163 | if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) { | |
164 | if (mutable->port_config.saddr) | |
165 | return &local_remote_ports; | |
166 | else | |
167 | return &remote_ports; | |
168 | } else { | |
169 | if (mutable->port_config.saddr) | |
170 | return &key_local_remote_ports; | |
171 | else | |
172 | return &key_remote_ports; | |
173 | } | |
174 | } | |
175 | ||
d1eb60cc | 176 | struct port_lookup_key { |
b9298d3f BP |
177 | const struct tnl_mutable_config *mutable; |
178 | __be64 key; | |
4029c21a BP |
179 | u32 tunnel_type; |
180 | __be32 saddr; | |
181 | __be32 daddr; | |
d1eb60cc JG |
182 | }; |
183 | ||
184 | /* | |
185 | * Modifies 'target' to store the rcu_dereferenced pointer that was used to do | |
186 | * the comparision. | |
187 | */ | |
188 | static int port_cmp(const struct tbl_node *node, void *target) | |
189 | { | |
190 | const struct tnl_vport *tnl_vport = tnl_vport_table_cast(node); | |
191 | struct port_lookup_key *lookup = target; | |
192 | ||
e33adfd0 | 193 | lookup->mutable = rcu_dereference_rtnl(tnl_vport->mutable); |
d1eb60cc | 194 | |
4029c21a BP |
195 | return (lookup->mutable->tunnel_type == lookup->tunnel_type && |
196 | lookup->mutable->port_config.daddr == lookup->daddr && | |
197 | lookup->mutable->port_config.in_key == lookup->key && | |
198 | lookup->mutable->port_config.saddr == lookup->saddr); | |
d1eb60cc JG |
199 | } |
200 | ||
4029c21a | 201 | static u32 port_hash(struct port_lookup_key *k) |
d1eb60cc | 202 | { |
8dda8c9b JG |
203 | u32 x = jhash_3words((__force u32)k->saddr, (__force u32)k->daddr, |
204 | k->tunnel_type, 0); | |
205 | return jhash_2words((__force u64)k->key >> 32, (__force u32)k->key, x); | |
d1eb60cc JG |
206 | } |
207 | ||
842cf6f4 JG |
208 | static u32 mutable_hash(const struct tnl_mutable_config *mutable) |
209 | { | |
210 | struct port_lookup_key lookup; | |
211 | ||
4029c21a BP |
212 | lookup.saddr = mutable->port_config.saddr; |
213 | lookup.daddr = mutable->port_config.daddr; | |
214 | lookup.key = mutable->port_config.in_key; | |
215 | lookup.tunnel_type = mutable->tunnel_type; | |
842cf6f4 JG |
216 | |
217 | return port_hash(&lookup); | |
218 | } | |
219 | ||
220 | static void check_table_empty(void) | |
221 | { | |
ad919711 | 222 | struct tbl *old_table = rtnl_dereference(port_table); |
842cf6f4 | 223 | |
ad919711 | 224 | if (tbl_count(old_table) == 0) { |
842cf6f4 JG |
225 | cancel_delayed_work_sync(&cache_cleaner_wq); |
226 | rcu_assign_pointer(port_table, NULL); | |
227 | tbl_deferred_destroy(old_table, NULL); | |
228 | } | |
229 | } | |
230 | ||
d1eb60cc JG |
231 | static int add_port(struct vport *vport) |
232 | { | |
ad919711 | 233 | struct tbl *cur_table = rtnl_dereference(port_table); |
d1eb60cc | 234 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); |
d1eb60cc JG |
235 | int err; |
236 | ||
237 | if (!port_table) { | |
238 | struct tbl *new_table; | |
239 | ||
c6fadeb1 | 240 | new_table = tbl_create(TBL_MIN_BUCKETS); |
d1eb60cc JG |
241 | if (!new_table) |
242 | return -ENOMEM; | |
243 | ||
244 | rcu_assign_pointer(port_table, new_table); | |
842cf6f4 | 245 | schedule_cache_cleaner(); |
d1eb60cc | 246 | |
ad919711 | 247 | } else if (tbl_count(cur_table) > tbl_n_buckets(cur_table)) { |
d1eb60cc JG |
248 | struct tbl *new_table; |
249 | ||
ad919711 | 250 | new_table = tbl_expand(cur_table); |
d1eb60cc JG |
251 | if (IS_ERR(new_table)) |
252 | return PTR_ERR(new_table); | |
253 | ||
254 | rcu_assign_pointer(port_table, new_table); | |
ad919711 | 255 | tbl_deferred_destroy(cur_table, NULL); |
d1eb60cc JG |
256 | } |
257 | ||
ad919711 JG |
258 | err = tbl_insert(rtnl_dereference(port_table), &tnl_vport->tbl_node, |
259 | mutable_hash(rtnl_dereference(tnl_vport->mutable))); | |
842cf6f4 JG |
260 | if (err) { |
261 | check_table_empty(); | |
262 | return err; | |
263 | } | |
264 | ||
ad919711 | 265 | (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++; |
842cf6f4 JG |
266 | |
267 | return 0; | |
268 | } | |
269 | ||
270 | static int move_port(struct vport *vport, struct tnl_mutable_config *new_mutable) | |
271 | { | |
272 | int err; | |
ad919711 | 273 | struct tbl *cur_table = rtnl_dereference(port_table); |
842cf6f4 JG |
274 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); |
275 | u32 hash; | |
276 | ||
277 | hash = mutable_hash(new_mutable); | |
278 | if (hash == tnl_vport->tbl_node.hash) | |
279 | goto table_updated; | |
d1eb60cc | 280 | |
842cf6f4 JG |
281 | /* |
282 | * Ideally we should make this move atomic to avoid having gaps in | |
283 | * finding tunnels or the possibility of failure. However, if we do | |
284 | * find a tunnel it will always be consistent. | |
285 | */ | |
ad919711 | 286 | err = tbl_remove(cur_table, &tnl_vport->tbl_node); |
d1eb60cc JG |
287 | if (err) |
288 | return err; | |
289 | ||
ad919711 | 290 | err = tbl_insert(cur_table, &tnl_vport->tbl_node, hash); |
842cf6f4 | 291 | if (err) { |
ad919711 | 292 | (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--; |
842cf6f4 JG |
293 | check_table_empty(); |
294 | return err; | |
295 | } | |
296 | ||
297 | table_updated: | |
ad919711 | 298 | (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--; |
842cf6f4 | 299 | assign_config_rcu(vport, new_mutable); |
ad919711 | 300 | (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))++; |
d1eb60cc JG |
301 | |
302 | return 0; | |
303 | } | |
304 | ||
305 | static int del_port(struct vport *vport) | |
306 | { | |
307 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
308 | int err; | |
309 | ||
ad919711 | 310 | err = tbl_remove(rtnl_dereference(port_table), &tnl_vport->tbl_node); |
d1eb60cc JG |
311 | if (err) |
312 | return err; | |
313 | ||
842cf6f4 | 314 | check_table_empty(); |
ad919711 | 315 | (*find_port_pool(rtnl_dereference(tnl_vport->mutable)))--; |
d1eb60cc JG |
316 | |
317 | return 0; | |
318 | } | |
319 | ||
b9298d3f | 320 | struct vport *tnl_find_port(__be32 saddr, __be32 daddr, __be64 key, |
d1eb60cc JG |
321 | int tunnel_type, |
322 | const struct tnl_mutable_config **mutable) | |
323 | { | |
324 | struct port_lookup_key lookup; | |
e33adfd0 | 325 | struct tbl *table = rcu_dereference_rtnl(port_table); |
d1eb60cc JG |
326 | struct tbl_node *tbl_node; |
327 | ||
842cf6f4 | 328 | if (unlikely(!table)) |
d1eb60cc JG |
329 | return NULL; |
330 | ||
4029c21a BP |
331 | lookup.saddr = saddr; |
332 | lookup.daddr = daddr; | |
d1eb60cc JG |
333 | |
334 | if (tunnel_type & TNL_T_KEY_EXACT) { | |
4029c21a BP |
335 | lookup.key = key; |
336 | lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_MATCH; | |
d1eb60cc JG |
337 | |
338 | if (key_local_remote_ports) { | |
339 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
340 | if (tbl_node) | |
341 | goto found; | |
342 | } | |
343 | ||
344 | if (key_remote_ports) { | |
4029c21a | 345 | lookup.saddr = 0; |
d1eb60cc JG |
346 | |
347 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
348 | if (tbl_node) | |
349 | goto found; | |
350 | ||
4029c21a | 351 | lookup.saddr = saddr; |
d1eb60cc JG |
352 | } |
353 | } | |
354 | ||
355 | if (tunnel_type & TNL_T_KEY_MATCH) { | |
4029c21a BP |
356 | lookup.key = 0; |
357 | lookup.tunnel_type = tunnel_type & ~TNL_T_KEY_EXACT; | |
d1eb60cc JG |
358 | |
359 | if (local_remote_ports) { | |
360 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
361 | if (tbl_node) | |
362 | goto found; | |
363 | } | |
364 | ||
365 | if (remote_ports) { | |
4029c21a | 366 | lookup.saddr = 0; |
d1eb60cc JG |
367 | |
368 | tbl_node = tbl_lookup(table, &lookup, port_hash(&lookup), port_cmp); | |
369 | if (tbl_node) | |
370 | goto found; | |
371 | } | |
372 | } | |
373 | ||
374 | return NULL; | |
375 | ||
376 | found: | |
377 | *mutable = lookup.mutable; | |
378 | return tnl_vport_to_vport(tnl_vport_table_cast(tbl_node)); | |
379 | } | |
380 | ||
842cf6f4 JG |
381 | static inline void ecn_decapsulate(struct sk_buff *skb) |
382 | { | |
a2a96c04 JG |
383 | /* This is accessing the outer IP header of the tunnel, which we've |
384 | * already validated to be OK. skb->data is currently set to the start | |
385 | * of the inner Ethernet header, and we've validated ETH_HLEN. | |
386 | */ | |
387 | if (unlikely(INET_ECN_is_ce(ip_hdr(skb)->tos))) { | |
842cf6f4 | 388 | __be16 protocol = skb->protocol; |
a2a96c04 JG |
389 | |
390 | skb_set_network_header(skb, ETH_HLEN); | |
842cf6f4 JG |
391 | |
392 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
393 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
394 | return; | |
395 | ||
396 | protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
a2a96c04 | 397 | skb_set_network_header(skb, VLAN_ETH_HLEN); |
842cf6f4 JG |
398 | } |
399 | ||
400 | if (protocol == htons(ETH_P_IP)) { | |
a2a96c04 | 401 | if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) |
842cf6f4 JG |
402 | + sizeof(struct iphdr)))) |
403 | return; | |
404 | ||
a2a96c04 | 405 | IP_ECN_set_ce(ip_hdr(skb)); |
842cf6f4 JG |
406 | } |
407 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
408 | else if (protocol == htons(ETH_P_IPV6)) { | |
a2a96c04 | 409 | if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) |
842cf6f4 JG |
410 | + sizeof(struct ipv6hdr)))) |
411 | return; | |
412 | ||
a2a96c04 | 413 | IP6_ECN_set_ce(ipv6_hdr(skb)); |
842cf6f4 JG |
414 | } |
415 | #endif | |
416 | } | |
417 | } | |
418 | ||
419 | /* Called with rcu_read_lock. */ | |
420 | void tnl_rcv(struct vport *vport, struct sk_buff *skb) | |
421 | { | |
9851dd67 JG |
422 | /* Packets received by this function are in the following state: |
423 | * - skb->data points to the inner Ethernet header. | |
424 | * - The inner Ethernet header is in the linear data area. | |
425 | * - skb->csum does not include the inner Ethernet header. | |
426 | * - The layer pointers point at the outer headers. | |
427 | */ | |
428 | ||
429 | struct ethhdr *eh = (struct ethhdr *)skb->data; | |
430 | ||
431 | if (likely(ntohs(eh->h_proto) >= 1536)) | |
432 | skb->protocol = eh->h_proto; | |
433 | else | |
434 | skb->protocol = htons(ETH_P_802_2); | |
842cf6f4 JG |
435 | |
436 | skb_dst_drop(skb); | |
437 | nf_reset(skb); | |
438 | secpath_reset(skb); | |
842cf6f4 JG |
439 | |
440 | ecn_decapsulate(skb); | |
842cf6f4 JG |
441 | compute_ip_summed(skb, false); |
442 | ||
443 | vport_receive(vport, skb); | |
444 | } | |
445 | ||
d1eb60cc JG |
446 | static bool check_ipv4_address(__be32 addr) |
447 | { | |
448 | if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) | |
449 | || ipv4_is_loopback(addr) || ipv4_is_zeronet(addr)) | |
450 | return false; | |
451 | ||
452 | return true; | |
453 | } | |
454 | ||
455 | static bool ipv4_should_icmp(struct sk_buff *skb) | |
456 | { | |
457 | struct iphdr *old_iph = ip_hdr(skb); | |
458 | ||
459 | /* Don't respond to L2 broadcast. */ | |
460 | if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) | |
461 | return false; | |
462 | ||
463 | /* Don't respond to L3 broadcast or invalid addresses. */ | |
464 | if (!check_ipv4_address(old_iph->daddr) || | |
465 | !check_ipv4_address(old_iph->saddr)) | |
466 | return false; | |
467 | ||
468 | /* Only respond to the first fragment. */ | |
469 | if (old_iph->frag_off & htons(IP_OFFSET)) | |
470 | return false; | |
471 | ||
472 | /* Don't respond to ICMP error messages. */ | |
473 | if (old_iph->protocol == IPPROTO_ICMP) { | |
474 | u8 icmp_type, *icmp_typep; | |
475 | ||
476 | icmp_typep = skb_header_pointer(skb, (u8 *)old_iph + | |
477 | (old_iph->ihl << 2) + | |
478 | offsetof(struct icmphdr, type) - | |
479 | skb->data, sizeof(icmp_type), | |
480 | &icmp_type); | |
481 | ||
482 | if (!icmp_typep) | |
483 | return false; | |
484 | ||
485 | if (*icmp_typep > NR_ICMP_TYPES | |
486 | || (*icmp_typep <= ICMP_PARAMETERPROB | |
487 | && *icmp_typep != ICMP_ECHOREPLY | |
488 | && *icmp_typep != ICMP_ECHO)) | |
489 | return false; | |
490 | } | |
491 | ||
492 | return true; | |
493 | } | |
494 | ||
495 | static void ipv4_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, | |
496 | unsigned int mtu, unsigned int payload_length) | |
497 | { | |
498 | struct iphdr *iph, *old_iph = ip_hdr(skb); | |
499 | struct icmphdr *icmph; | |
500 | u8 *payload; | |
501 | ||
502 | iph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); | |
503 | icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); | |
504 | payload = skb_put(nskb, payload_length); | |
505 | ||
506 | /* IP */ | |
507 | iph->version = 4; | |
508 | iph->ihl = sizeof(struct iphdr) >> 2; | |
509 | iph->tos = (old_iph->tos & IPTOS_TOS_MASK) | | |
510 | IPTOS_PREC_INTERNETCONTROL; | |
511 | iph->tot_len = htons(sizeof(struct iphdr) | |
512 | + sizeof(struct icmphdr) | |
513 | + payload_length); | |
514 | get_random_bytes(&iph->id, sizeof(iph->id)); | |
515 | iph->frag_off = 0; | |
516 | iph->ttl = IPDEFTTL; | |
517 | iph->protocol = IPPROTO_ICMP; | |
518 | iph->daddr = old_iph->saddr; | |
519 | iph->saddr = old_iph->daddr; | |
520 | ||
521 | ip_send_check(iph); | |
522 | ||
523 | /* ICMP */ | |
524 | icmph->type = ICMP_DEST_UNREACH; | |
525 | icmph->code = ICMP_FRAG_NEEDED; | |
526 | icmph->un.gateway = htonl(mtu); | |
527 | icmph->checksum = 0; | |
528 | ||
529 | nskb->csum = csum_partial((u8 *)icmph, sizeof(struct icmphdr), 0); | |
530 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_iph - skb->data, | |
531 | payload, payload_length, | |
532 | nskb->csum); | |
533 | icmph->checksum = csum_fold(nskb->csum); | |
534 | } | |
535 | ||
536 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
537 | static bool ipv6_should_icmp(struct sk_buff *skb) | |
538 | { | |
539 | struct ipv6hdr *old_ipv6h = ipv6_hdr(skb); | |
540 | int addr_type; | |
541 | int payload_off = (u8 *)(old_ipv6h + 1) - skb->data; | |
542 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | |
543 | ||
544 | /* Check source address is valid. */ | |
545 | addr_type = ipv6_addr_type(&old_ipv6h->saddr); | |
546 | if (addr_type & IPV6_ADDR_MULTICAST || addr_type == IPV6_ADDR_ANY) | |
547 | return false; | |
548 | ||
549 | /* Don't reply to unspecified addresses. */ | |
550 | if (ipv6_addr_type(&old_ipv6h->daddr) == IPV6_ADDR_ANY) | |
551 | return false; | |
552 | ||
553 | /* Don't respond to ICMP error messages. */ | |
554 | payload_off = ipv6_skip_exthdr(skb, payload_off, &nexthdr); | |
555 | if (payload_off < 0) | |
556 | return false; | |
557 | ||
558 | if (nexthdr == NEXTHDR_ICMP) { | |
559 | u8 icmp_type, *icmp_typep; | |
560 | ||
561 | icmp_typep = skb_header_pointer(skb, payload_off + | |
562 | offsetof(struct icmp6hdr, | |
563 | icmp6_type), | |
564 | sizeof(icmp_type), &icmp_type); | |
565 | ||
566 | if (!icmp_typep || !(*icmp_typep & ICMPV6_INFOMSG_MASK)) | |
567 | return false; | |
568 | } | |
569 | ||
570 | return true; | |
571 | } | |
572 | ||
573 | static void ipv6_build_icmp(struct sk_buff *skb, struct sk_buff *nskb, | |
574 | unsigned int mtu, unsigned int payload_length) | |
575 | { | |
576 | struct ipv6hdr *ipv6h, *old_ipv6h = ipv6_hdr(skb); | |
577 | struct icmp6hdr *icmp6h; | |
578 | u8 *payload; | |
579 | ||
580 | ipv6h = (struct ipv6hdr *)skb_put(nskb, sizeof(struct ipv6hdr)); | |
581 | icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); | |
582 | payload = skb_put(nskb, payload_length); | |
583 | ||
584 | /* IPv6 */ | |
585 | ipv6h->version = 6; | |
586 | ipv6h->priority = 0; | |
587 | memset(&ipv6h->flow_lbl, 0, sizeof(ipv6h->flow_lbl)); | |
588 | ipv6h->payload_len = htons(sizeof(struct icmp6hdr) | |
589 | + payload_length); | |
590 | ipv6h->nexthdr = NEXTHDR_ICMP; | |
591 | ipv6h->hop_limit = IPV6_DEFAULT_HOPLIMIT; | |
592 | ipv6_addr_copy(&ipv6h->daddr, &old_ipv6h->saddr); | |
593 | ipv6_addr_copy(&ipv6h->saddr, &old_ipv6h->daddr); | |
594 | ||
595 | /* ICMPv6 */ | |
596 | icmp6h->icmp6_type = ICMPV6_PKT_TOOBIG; | |
597 | icmp6h->icmp6_code = 0; | |
598 | icmp6h->icmp6_cksum = 0; | |
599 | icmp6h->icmp6_mtu = htonl(mtu); | |
600 | ||
601 | nskb->csum = csum_partial((u8 *)icmp6h, sizeof(struct icmp6hdr), 0); | |
602 | nskb->csum = skb_copy_and_csum_bits(skb, (u8 *)old_ipv6h - skb->data, | |
603 | payload, payload_length, | |
604 | nskb->csum); | |
605 | icmp6h->icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, | |
606 | sizeof(struct icmp6hdr) | |
607 | + payload_length, | |
608 | ipv6h->nexthdr, nskb->csum); | |
609 | } | |
610 | #endif /* IPv6 */ | |
611 | ||
612 | bool tnl_frag_needed(struct vport *vport, const struct tnl_mutable_config *mutable, | |
b9298d3f | 613 | struct sk_buff *skb, unsigned int mtu, __be64 flow_key) |
d1eb60cc JG |
614 | { |
615 | unsigned int eth_hdr_len = ETH_HLEN; | |
616 | unsigned int total_length = 0, header_length = 0, payload_length; | |
617 | struct ethhdr *eh, *old_eh = eth_hdr(skb); | |
618 | struct sk_buff *nskb; | |
619 | ||
620 | /* Sanity check */ | |
621 | if (skb->protocol == htons(ETH_P_IP)) { | |
622 | if (mtu < IP_MIN_MTU) | |
623 | return false; | |
624 | ||
625 | if (!ipv4_should_icmp(skb)) | |
626 | return true; | |
627 | } | |
628 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
629 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
630 | if (mtu < IPV6_MIN_MTU) | |
631 | return false; | |
632 | ||
633 | /* | |
634 | * In theory we should do PMTUD on IPv6 multicast messages but | |
635 | * we don't have an address to send from so just fragment. | |
636 | */ | |
637 | if (ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) | |
638 | return false; | |
639 | ||
640 | if (!ipv6_should_icmp(skb)) | |
641 | return true; | |
642 | } | |
643 | #endif | |
644 | else | |
645 | return false; | |
646 | ||
647 | /* Allocate */ | |
648 | if (old_eh->h_proto == htons(ETH_P_8021Q)) | |
649 | eth_hdr_len = VLAN_ETH_HLEN; | |
650 | ||
651 | payload_length = skb->len - eth_hdr_len; | |
652 | if (skb->protocol == htons(ETH_P_IP)) { | |
653 | header_length = sizeof(struct iphdr) + sizeof(struct icmphdr); | |
654 | total_length = min_t(unsigned int, header_length + | |
655 | payload_length, 576); | |
656 | } | |
657 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
658 | else { | |
659 | header_length = sizeof(struct ipv6hdr) + | |
660 | sizeof(struct icmp6hdr); | |
661 | total_length = min_t(unsigned int, header_length + | |
662 | payload_length, IPV6_MIN_MTU); | |
663 | } | |
664 | #endif | |
665 | ||
666 | total_length = min(total_length, mutable->mtu); | |
667 | payload_length = total_length - header_length; | |
668 | ||
669 | nskb = dev_alloc_skb(NET_IP_ALIGN + eth_hdr_len + header_length + | |
670 | payload_length); | |
671 | if (!nskb) | |
672 | return false; | |
673 | ||
674 | skb_reserve(nskb, NET_IP_ALIGN); | |
675 | ||
676 | /* Ethernet / VLAN */ | |
677 | eh = (struct ethhdr *)skb_put(nskb, eth_hdr_len); | |
678 | memcpy(eh->h_dest, old_eh->h_source, ETH_ALEN); | |
679 | memcpy(eh->h_source, mutable->eth_addr, ETH_ALEN); | |
680 | nskb->protocol = eh->h_proto = old_eh->h_proto; | |
681 | if (old_eh->h_proto == htons(ETH_P_8021Q)) { | |
682 | struct vlan_ethhdr *vh = (struct vlan_ethhdr *)eh; | |
683 | ||
684 | vh->h_vlan_TCI = vlan_eth_hdr(skb)->h_vlan_TCI; | |
685 | vh->h_vlan_encapsulated_proto = skb->protocol; | |
686 | } | |
687 | skb_reset_mac_header(nskb); | |
688 | ||
689 | /* Protocol */ | |
690 | if (skb->protocol == htons(ETH_P_IP)) | |
691 | ipv4_build_icmp(skb, nskb, mtu, payload_length); | |
692 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
693 | else | |
694 | ipv6_build_icmp(skb, nskb, mtu, payload_length); | |
695 | #endif | |
696 | ||
697 | /* | |
698 | * Assume that flow based keys are symmetric with respect to input | |
699 | * and output and use the key that we were going to put on the | |
700 | * outgoing packet for the fake received packet. If the keys are | |
701 | * not symmetric then PMTUD needs to be disabled since we won't have | |
702 | * any way of synthesizing packets. | |
703 | */ | |
704 | if ((mutable->port_config.flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) == | |
705 | (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) | |
706 | OVS_CB(nskb)->tun_id = flow_key; | |
707 | ||
708 | compute_ip_summed(nskb, false); | |
709 | vport_receive(vport, nskb); | |
710 | ||
711 | return true; | |
712 | } | |
713 | ||
842cf6f4 JG |
714 | static bool check_mtu(struct sk_buff *skb, |
715 | struct vport *vport, | |
716 | const struct tnl_mutable_config *mutable, | |
717 | const struct rtable *rt, __be16 *frag_offp) | |
d1eb60cc | 718 | { |
842cf6f4 JG |
719 | int mtu; |
720 | __be16 frag_off; | |
721 | ||
722 | frag_off = (mutable->port_config.flags & TNL_F_PMTUD) ? htons(IP_DF) : 0; | |
723 | if (frag_off) | |
724 | mtu = dst_mtu(&rt_dst(rt)) | |
725 | - ETH_HLEN | |
726 | - mutable->tunnel_hlen | |
727 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
728 | else | |
729 | mtu = mutable->mtu; | |
730 | ||
731 | if (skb->protocol == htons(ETH_P_IP)) { | |
732 | struct iphdr *old_iph = ip_hdr(skb); | |
733 | ||
734 | frag_off |= old_iph->frag_off & htons(IP_DF); | |
735 | mtu = max(mtu, IP_MIN_MTU); | |
736 | ||
737 | if ((old_iph->frag_off & htons(IP_DF)) && | |
738 | mtu < ntohs(old_iph->tot_len)) { | |
739 | if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) | |
740 | goto drop; | |
d1eb60cc | 741 | } |
842cf6f4 JG |
742 | } |
743 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
744 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
745 | unsigned int packet_length = skb->len - ETH_HLEN | |
746 | - (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q) ? VLAN_HLEN : 0); | |
d1eb60cc | 747 | |
842cf6f4 | 748 | mtu = max(mtu, IPV6_MIN_MTU); |
d1eb60cc | 749 | |
842cf6f4 JG |
750 | /* IPv6 requires PMTUD if the packet is above the minimum MTU. */ |
751 | if (packet_length > IPV6_MIN_MTU) | |
752 | frag_off = htons(IP_DF); | |
d1eb60cc | 753 | |
842cf6f4 JG |
754 | if (mtu < packet_length) { |
755 | if (tnl_frag_needed(vport, mutable, skb, mtu, OVS_CB(skb)->tun_id)) | |
756 | goto drop; | |
757 | } | |
d1eb60cc | 758 | } |
842cf6f4 | 759 | #endif |
d1eb60cc | 760 | |
842cf6f4 JG |
761 | *frag_offp = frag_off; |
762 | return true; | |
763 | ||
764 | drop: | |
765 | *frag_offp = 0; | |
766 | return false; | |
d1eb60cc JG |
767 | } |
768 | ||
842cf6f4 JG |
769 | static void create_tunnel_header(const struct vport *vport, |
770 | const struct tnl_mutable_config *mutable, | |
771 | const struct rtable *rt, void *header) | |
d1eb60cc | 772 | { |
842cf6f4 JG |
773 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); |
774 | struct iphdr *iph = header; | |
775 | ||
776 | iph->version = 4; | |
777 | iph->ihl = sizeof(struct iphdr) >> 2; | |
778 | iph->frag_off = htons(IP_DF); | |
779 | iph->protocol = tnl_vport->tnl_ops->ipproto; | |
780 | iph->tos = mutable->port_config.tos; | |
781 | iph->daddr = rt->rt_dst; | |
782 | iph->saddr = rt->rt_src; | |
783 | iph->ttl = mutable->port_config.ttl; | |
784 | if (!iph->ttl) | |
785 | iph->ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); | |
786 | ||
787 | tnl_vport->tnl_ops->build_header(vport, mutable, iph + 1); | |
788 | } | |
d1eb60cc | 789 | |
842cf6f4 JG |
790 | static inline void *get_cached_header(const struct tnl_cache *cache) |
791 | { | |
792 | return (void *)cache + ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN); | |
793 | } | |
d1eb60cc | 794 | |
842cf6f4 JG |
795 | static inline bool check_cache_valid(const struct tnl_cache *cache, |
796 | const struct tnl_mutable_config *mutable) | |
797 | { | |
798 | return cache && | |
799 | #ifdef NEED_CACHE_TIMEOUT | |
800 | time_before(jiffies, cache->expiration) && | |
801 | #endif | |
802 | #ifdef HAVE_RT_GENID | |
803 | atomic_read(&init_net.ipv4.rt_genid) == cache->rt->rt_genid && | |
804 | #endif | |
805 | #ifdef HAVE_HH_SEQ | |
806 | rt_dst(cache->rt).hh->hh_lock.sequence == cache->hh_seq && | |
807 | #endif | |
808 | mutable->seq == cache->mutable_seq && | |
809 | (!is_internal_dev(rt_dst(cache->rt).dev) || | |
810 | (cache->flow && !cache->flow->dead)); | |
d1eb60cc JG |
811 | } |
812 | ||
842cf6f4 | 813 | static int cache_cleaner_cb(struct tbl_node *tbl_node, void *aux) |
d1eb60cc | 814 | { |
842cf6f4 JG |
815 | struct tnl_vport *tnl_vport = tnl_vport_table_cast(tbl_node); |
816 | const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable); | |
817 | const struct tnl_cache *cache = rcu_dereference(tnl_vport->cache); | |
d1eb60cc | 818 | |
842cf6f4 JG |
819 | if (cache && !check_cache_valid(cache, mutable) && |
820 | spin_trylock_bh(&tnl_vport->cache_lock)) { | |
821 | assign_cache_rcu(tnl_vport_to_vport(tnl_vport), NULL); | |
822 | spin_unlock_bh(&tnl_vport->cache_lock); | |
823 | } | |
d1eb60cc | 824 | |
842cf6f4 JG |
825 | return 0; |
826 | } | |
d1eb60cc | 827 | |
842cf6f4 JG |
828 | static void cache_cleaner(struct work_struct *work) |
829 | { | |
830 | schedule_cache_cleaner(); | |
d1eb60cc | 831 | |
842cf6f4 | 832 | rcu_read_lock(); |
1e71f10f | 833 | tbl_foreach(rcu_dereference(port_table), cache_cleaner_cb, NULL); |
842cf6f4 JG |
834 | rcu_read_unlock(); |
835 | } | |
d1eb60cc | 836 | |
842cf6f4 JG |
837 | static inline void create_eth_hdr(struct tnl_cache *cache, |
838 | const struct rtable *rt) | |
839 | { | |
840 | void *cache_data = get_cached_header(cache); | |
841 | int hh_len = rt_dst(rt).hh->hh_len; | |
842 | int hh_off = HH_DATA_ALIGN(rt_dst(rt).hh->hh_len) - hh_len; | |
d1eb60cc | 843 | |
842cf6f4 JG |
844 | #ifdef HAVE_HH_SEQ |
845 | unsigned hh_seq; | |
846 | ||
847 | do { | |
848 | hh_seq = read_seqbegin(&rt_dst(rt).hh->hh_lock); | |
849 | memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len); | |
850 | } while (read_seqretry(&rt_dst(rt).hh->hh_lock, hh_seq)); | |
851 | ||
852 | cache->hh_seq = hh_seq; | |
853 | #else | |
854 | read_lock_bh(&rt_dst(rt).hh->hh_lock); | |
855 | memcpy(cache_data, (void *)rt_dst(rt).hh->hh_data + hh_off, hh_len); | |
856 | read_unlock_bh(&rt_dst(rt).hh->hh_lock); | |
d1eb60cc | 857 | #endif |
d1eb60cc JG |
858 | } |
859 | ||
842cf6f4 JG |
860 | static struct tnl_cache *build_cache(struct vport *vport, |
861 | const struct tnl_mutable_config *mutable, | |
862 | struct rtable *rt) | |
d1eb60cc | 863 | { |
842cf6f4 JG |
864 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); |
865 | struct tnl_cache *cache; | |
866 | void *cache_data; | |
867 | int cache_len; | |
d1eb60cc | 868 | |
842cf6f4 JG |
869 | if (!(mutable->port_config.flags & TNL_F_HDR_CACHE)) |
870 | return NULL; | |
871 | ||
872 | /* | |
873 | * If there is no entry in the ARP cache or if this device does not | |
874 | * support hard header caching just fall back to the IP stack. | |
875 | */ | |
876 | if (!rt_dst(rt).hh) | |
877 | return NULL; | |
878 | ||
879 | /* | |
880 | * If lock is contended fall back to directly building the header. | |
881 | * We're not going to help performance by sitting here spinning. | |
882 | */ | |
883 | if (!spin_trylock_bh(&tnl_vport->cache_lock)) | |
884 | return NULL; | |
885 | ||
758a12d2 | 886 | cache = cache_dereference(tnl_vport); |
842cf6f4 JG |
887 | if (check_cache_valid(cache, mutable)) |
888 | goto unlock; | |
889 | else | |
890 | cache = NULL; | |
891 | ||
892 | cache_len = rt_dst(rt).hh->hh_len + mutable->tunnel_hlen; | |
893 | ||
894 | cache = kzalloc(ALIGN(sizeof(struct tnl_cache), CACHE_DATA_ALIGN) + | |
895 | cache_len, GFP_ATOMIC); | |
896 | if (!cache) | |
897 | goto unlock; | |
898 | ||
899 | cache->len = cache_len; | |
900 | ||
901 | create_eth_hdr(cache, rt); | |
902 | cache_data = get_cached_header(cache) + rt_dst(rt).hh->hh_len; | |
903 | ||
904 | create_tunnel_header(vport, mutable, rt, cache_data); | |
905 | ||
906 | cache->mutable_seq = mutable->seq; | |
907 | cache->rt = rt; | |
908 | #ifdef NEED_CACHE_TIMEOUT | |
909 | cache->expiration = jiffies + tnl_vport->cache_exp_interval; | |
910 | #endif | |
911 | ||
912 | if (is_internal_dev(rt_dst(rt).dev)) { | |
e779d8d9 BP |
913 | struct odp_flow_key flow_key; |
914 | struct tbl_node *flow_node; | |
ecd859a2 | 915 | struct vport *dst_vport; |
842cf6f4 JG |
916 | struct sk_buff *skb; |
917 | bool is_frag; | |
e779d8d9 | 918 | int err; |
842cf6f4 | 919 | |
ecd859a2 JG |
920 | dst_vport = internal_dev_get_vport(rt_dst(rt).dev); |
921 | if (!dst_vport) | |
842cf6f4 JG |
922 | goto done; |
923 | ||
842cf6f4 JG |
924 | skb = alloc_skb(cache->len, GFP_ATOMIC); |
925 | if (!skb) | |
926 | goto done; | |
927 | ||
928 | __skb_put(skb, cache->len); | |
929 | memcpy(skb->data, get_cached_header(cache), cache->len); | |
930 | ||
ecd859a2 | 931 | err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag); |
842cf6f4 JG |
932 | |
933 | kfree_skb(skb); | |
934 | if (err || is_frag) | |
935 | goto done; | |
936 | ||
ecd859a2 | 937 | flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table), |
842cf6f4 JG |
938 | &flow_key, flow_hash(&flow_key), |
939 | flow_cmp); | |
940 | if (flow_node) { | |
941 | struct sw_flow *flow = flow_cast(flow_node); | |
942 | ||
943 | cache->flow = flow; | |
944 | flow_hold(flow); | |
945 | } | |
d1eb60cc JG |
946 | } |
947 | ||
842cf6f4 JG |
948 | done: |
949 | assign_cache_rcu(vport, cache); | |
950 | ||
951 | unlock: | |
952 | spin_unlock_bh(&tnl_vport->cache_lock); | |
953 | ||
954 | return cache; | |
d1eb60cc JG |
955 | } |
956 | ||
842cf6f4 JG |
957 | static struct rtable *find_route(struct vport *vport, |
958 | const struct tnl_mutable_config *mutable, | |
959 | u8 tos, struct tnl_cache **cache) | |
d1eb60cc | 960 | { |
842cf6f4 JG |
961 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); |
962 | struct tnl_cache *cur_cache = rcu_dereference(tnl_vport->cache); | |
963 | ||
964 | *cache = NULL; | |
965 | tos = RT_TOS(tos); | |
966 | ||
967 | if (likely(tos == mutable->port_config.tos && | |
968 | check_cache_valid(cur_cache, mutable))) { | |
969 | *cache = cur_cache; | |
970 | return cur_cache->rt; | |
971 | } else { | |
972 | struct rtable *rt; | |
973 | struct flowi fl = { .nl_u = { .ip4_u = | |
974 | { .daddr = mutable->port_config.daddr, | |
975 | .saddr = mutable->port_config.saddr, | |
976 | .tos = tos } }, | |
977 | .proto = tnl_vport->tnl_ops->ipproto }; | |
978 | ||
979 | if (unlikely(ip_route_output_key(&init_net, &rt, &fl))) | |
980 | return NULL; | |
981 | ||
982 | if (likely(tos == mutable->port_config.tos)) | |
983 | *cache = build_cache(vport, mutable, rt); | |
984 | ||
985 | return rt; | |
d1eb60cc JG |
986 | } |
987 | } | |
988 | ||
842cf6f4 | 989 | static struct sk_buff *check_headroom(struct sk_buff *skb, int headroom) |
d1eb60cc | 990 | { |
842cf6f4 JG |
991 | if (skb_headroom(skb) < headroom || skb_header_cloned(skb)) { |
992 | struct sk_buff *nskb = skb_realloc_headroom(skb, headroom + 16); | |
993 | if (unlikely(!nskb)) { | |
994 | kfree_skb(skb); | |
995 | return ERR_PTR(-ENOMEM); | |
996 | } | |
d1eb60cc | 997 | |
842cf6f4 | 998 | set_skb_csum_bits(skb, nskb); |
d1eb60cc | 999 | |
842cf6f4 JG |
1000 | if (skb->sk) |
1001 | skb_set_owner_w(nskb, skb->sk); | |
d1eb60cc | 1002 | |
842cf6f4 JG |
1003 | kfree_skb(skb); |
1004 | return nskb; | |
1005 | } | |
d1eb60cc | 1006 | |
842cf6f4 | 1007 | return skb; |
d1eb60cc JG |
1008 | } |
1009 | ||
842cf6f4 | 1010 | static inline bool need_linearize(const struct sk_buff *skb) |
d1eb60cc | 1011 | { |
842cf6f4 JG |
1012 | int i; |
1013 | ||
1014 | if (unlikely(skb_shinfo(skb)->frag_list)) | |
1015 | return true; | |
1016 | ||
1017 | /* | |
1018 | * Generally speaking we should linearize if there are paged frags. | |
1019 | * However, if all of the refcounts are 1 we know nobody else can | |
1020 | * change them from underneath us and we can skip the linearization. | |
1021 | */ | |
1022 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
1023 | if (unlikely(page_count(skb_shinfo(skb)->frags[0].page) > 1)) | |
1024 | return true; | |
1025 | ||
1026 | return false; | |
1027 | } | |
1028 | ||
1029 | static struct sk_buff *handle_offloads(struct sk_buff *skb, | |
1030 | const struct tnl_mutable_config *mutable, | |
1031 | const struct rtable *rt) | |
1032 | { | |
1033 | int min_headroom; | |
d1eb60cc | 1034 | int err; |
d1eb60cc | 1035 | |
842cf6f4 | 1036 | forward_ip_summed(skb); |
d1eb60cc | 1037 | |
842cf6f4 | 1038 | err = vswitch_skb_checksum_setup(skb); |
d1eb60cc JG |
1039 | if (unlikely(err)) |
1040 | goto error_free; | |
1041 | ||
842cf6f4 JG |
1042 | min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len |
1043 | + mutable->tunnel_hlen; | |
d1eb60cc | 1044 | |
842cf6f4 JG |
1045 | if (skb_is_gso(skb)) { |
1046 | struct sk_buff *nskb; | |
1047 | ||
1048 | /* | |
1049 | * If we are doing GSO on a pskb it is better to make sure that | |
1050 | * the headroom is correct now. We will only have to copy the | |
1051 | * portion in the linear data area and GSO will preserve | |
1052 | * headroom when it creates the segments. This is particularly | |
1053 | * beneficial on Xen where we get a lot of GSO pskbs. | |
1054 | * Conversely, we avoid copying if it is just to get our own | |
1055 | * writable clone because GSO will do the copy for us. | |
1056 | */ | |
1057 | if (skb_headroom(skb) < min_headroom) { | |
1058 | skb = check_headroom(skb, min_headroom); | |
40796b34 | 1059 | if (IS_ERR(skb)) { |
842cf6f4 JG |
1060 | err = PTR_ERR(skb); |
1061 | goto error; | |
1062 | } | |
d1eb60cc JG |
1063 | } |
1064 | ||
842cf6f4 JG |
1065 | nskb = skb_gso_segment(skb, 0); |
1066 | kfree_skb(skb); | |
40796b34 | 1067 | if (IS_ERR(nskb)) { |
842cf6f4 JG |
1068 | err = PTR_ERR(nskb); |
1069 | goto error; | |
1070 | } | |
d1eb60cc | 1071 | |
842cf6f4 JG |
1072 | skb = nskb; |
1073 | } else { | |
1074 | skb = check_headroom(skb, min_headroom); | |
40796b34 | 1075 | if (IS_ERR(skb)) { |
842cf6f4 JG |
1076 | err = PTR_ERR(skb); |
1077 | goto error; | |
1078 | } | |
d1eb60cc | 1079 | |
842cf6f4 JG |
1080 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1081 | /* | |
1082 | * Pages aren't locked and could change at any time. | |
1083 | * If this happens after we compute the checksum, the | |
1084 | * checksum will be wrong. We linearize now to avoid | |
1085 | * this problem. | |
1086 | */ | |
1087 | if (unlikely(need_linearize(skb))) { | |
1088 | err = __skb_linearize(skb); | |
1089 | if (unlikely(err)) | |
1090 | goto error_free; | |
1091 | } | |
1092 | ||
1093 | err = skb_checksum_help(skb); | |
1094 | if (unlikely(err)) | |
d1eb60cc | 1095 | goto error_free; |
842cf6f4 JG |
1096 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) |
1097 | skb->ip_summed = CHECKSUM_NONE; | |
d1eb60cc | 1098 | } |
d1eb60cc | 1099 | |
842cf6f4 | 1100 | return skb; |
d1eb60cc | 1101 | |
842cf6f4 JG |
1102 | error_free: |
1103 | kfree_skb(skb); | |
1104 | error: | |
1105 | return ERR_PTR(err); | |
1106 | } | |
d1eb60cc | 1107 | |
842cf6f4 JG |
1108 | static int send_frags(struct sk_buff *skb, |
1109 | const struct tnl_mutable_config *mutable) | |
1110 | { | |
1111 | int sent_len; | |
1112 | int err; | |
d1eb60cc | 1113 | |
842cf6f4 | 1114 | sent_len = 0; |
5214f5c4 JG |
1115 | while (skb) { |
1116 | struct sk_buff *next = skb->next; | |
1117 | int frag_len = skb->len - mutable->tunnel_hlen; | |
d1eb60cc | 1118 | |
5214f5c4 | 1119 | skb->next = NULL; |
b1195d37 | 1120 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
7da5c939 | 1121 | |
5214f5c4 | 1122 | err = ip_local_out(skb); |
842cf6f4 JG |
1123 | if (likely(net_xmit_eval(err) == 0)) |
1124 | sent_len += frag_len; | |
1125 | else { | |
5214f5c4 JG |
1126 | skb = next; |
1127 | goto free_frags; | |
1128 | } | |
1129 | ||
1130 | skb = next; | |
842cf6f4 | 1131 | } |
5214f5c4 | 1132 | |
842cf6f4 | 1133 | return sent_len; |
d1eb60cc | 1134 | |
5214f5c4 JG |
1135 | free_frags: |
1136 | /* | |
1137 | * There's no point in continuing to send fragments once one has been | |
1138 | * dropped so just free the rest. This may help improve the congestion | |
1139 | * that caused the first packet to be dropped. | |
1140 | */ | |
842cf6f4 JG |
1141 | tnl_free_linked_skbs(skb); |
1142 | return sent_len; | |
d1eb60cc JG |
1143 | } |
1144 | ||
1145 | int tnl_send(struct vport *vport, struct sk_buff *skb) | |
1146 | { | |
1147 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1148 | const struct tnl_mutable_config *mutable = rcu_dereference(tnl_vport->mutable); | |
1149 | ||
842cf6f4 | 1150 | enum vport_err_type err = VPORT_E_TX_ERROR; |
d1eb60cc | 1151 | struct rtable *rt; |
842cf6f4 JG |
1152 | struct dst_entry *unattached_dst = NULL; |
1153 | struct tnl_cache *cache; | |
1154 | int sent_len = 0; | |
1155 | __be16 frag_off; | |
1156 | u8 ttl; | |
1157 | u8 inner_tos; | |
1158 | u8 tos; | |
d1eb60cc JG |
1159 | |
1160 | /* Validate the protocol headers before we try to use them. */ | |
1161 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
1162 | if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) | |
1163 | goto error_free; | |
1164 | ||
1165 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
1166 | skb_set_network_header(skb, VLAN_ETH_HLEN); | |
1167 | } | |
1168 | ||
1169 | if (skb->protocol == htons(ETH_P_IP)) { | |
842cf6f4 JG |
1170 | if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) |
1171 | + sizeof(struct iphdr)))) | |
d1eb60cc JG |
1172 | skb->protocol = 0; |
1173 | } | |
1174 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
1175 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
842cf6f4 JG |
1176 | if (unlikely(!pskb_may_pull(skb, skb_network_offset(skb) |
1177 | + sizeof(struct ipv6hdr)))) | |
d1eb60cc JG |
1178 | skb->protocol = 0; |
1179 | } | |
1180 | #endif | |
d1eb60cc | 1181 | |
842cf6f4 JG |
1182 | /* ToS */ |
1183 | if (skb->protocol == htons(ETH_P_IP)) | |
1184 | inner_tos = ip_hdr(skb)->tos; | |
d1eb60cc | 1185 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
842cf6f4 JG |
1186 | else if (skb->protocol == htons(ETH_P_IPV6)) |
1187 | inner_tos = ipv6_get_dsfield(ipv6_hdr(skb)); | |
d1eb60cc | 1188 | #endif |
842cf6f4 JG |
1189 | else |
1190 | inner_tos = 0; | |
d1eb60cc | 1191 | |
842cf6f4 JG |
1192 | if (mutable->port_config.flags & TNL_F_TOS_INHERIT) |
1193 | tos = inner_tos; | |
1194 | else | |
1195 | tos = mutable->port_config.tos; | |
d1eb60cc | 1196 | |
842cf6f4 JG |
1197 | tos = INET_ECN_encapsulate(tos, inner_tos); |
1198 | ||
1199 | /* Route lookup */ | |
1200 | rt = find_route(vport, mutable, tos, &cache); | |
1201 | if (unlikely(!rt)) | |
1202 | goto error_free; | |
1203 | if (unlikely(!cache)) | |
1204 | unattached_dst = &rt_dst(rt); | |
1205 | ||
1206 | /* Reset SKB */ | |
1207 | nf_reset(skb); | |
1208 | secpath_reset(skb); | |
1209 | skb_dst_drop(skb); | |
1210 | ||
1211 | /* Offloading */ | |
1212 | skb = handle_offloads(skb, mutable, rt); | |
40796b34 | 1213 | if (IS_ERR(skb)) |
842cf6f4 JG |
1214 | goto error; |
1215 | ||
1216 | /* MTU */ | |
1217 | if (unlikely(!check_mtu(skb, vport, mutable, rt, &frag_off))) { | |
1218 | err = VPORT_E_TX_DROPPED; | |
1219 | goto error_free; | |
d1eb60cc JG |
1220 | } |
1221 | ||
842cf6f4 JG |
1222 | /* |
1223 | * If we are over the MTU, allow the IP stack to handle fragmentation. | |
1224 | * Fragmentation is a slow path anyways. | |
1225 | */ | |
1226 | if (unlikely(skb->len + mutable->tunnel_hlen > dst_mtu(&rt_dst(rt)) && | |
1227 | cache)) { | |
1228 | unattached_dst = &rt_dst(rt); | |
1229 | dst_hold(unattached_dst); | |
1230 | cache = NULL; | |
1231 | } | |
1232 | ||
1233 | /* TTL */ | |
1234 | ttl = mutable->port_config.ttl; | |
1235 | if (!ttl) | |
1236 | ttl = dst_metric(&rt_dst(rt), RTAX_HOPLIMIT); | |
1237 | ||
d1eb60cc JG |
1238 | if (mutable->port_config.flags & TNL_F_TTL_INHERIT) { |
1239 | if (skb->protocol == htons(ETH_P_IP)) | |
842cf6f4 | 1240 | ttl = ip_hdr(skb)->ttl; |
d1eb60cc JG |
1241 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
1242 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
842cf6f4 | 1243 | ttl = ipv6_hdr(skb)->hop_limit; |
d1eb60cc JG |
1244 | #endif |
1245 | } | |
d1eb60cc | 1246 | |
842cf6f4 JG |
1247 | while (skb) { |
1248 | struct iphdr *iph; | |
1249 | struct sk_buff *next_skb = skb->next; | |
1250 | skb->next = NULL; | |
d1eb60cc | 1251 | |
842cf6f4 JG |
1252 | if (likely(cache)) { |
1253 | skb_push(skb, cache->len); | |
1254 | memcpy(skb->data, get_cached_header(cache), cache->len); | |
1255 | skb_reset_mac_header(skb); | |
1256 | skb_set_network_header(skb, rt_dst(rt).hh->hh_len); | |
d1eb60cc | 1257 | |
842cf6f4 JG |
1258 | } else { |
1259 | skb_push(skb, mutable->tunnel_hlen); | |
1260 | create_tunnel_header(vport, mutable, rt, skb->data); | |
1261 | skb_reset_network_header(skb); | |
d1eb60cc | 1262 | |
842cf6f4 JG |
1263 | if (next_skb) |
1264 | skb_dst_set(skb, dst_clone(unattached_dst)); | |
1265 | else { | |
1266 | skb_dst_set(skb, unattached_dst); | |
1267 | unattached_dst = NULL; | |
1268 | } | |
d1eb60cc | 1269 | } |
842cf6f4 | 1270 | skb_set_transport_header(skb, skb_network_offset(skb) + sizeof(struct iphdr)); |
d1eb60cc | 1271 | |
842cf6f4 JG |
1272 | iph = ip_hdr(skb); |
1273 | iph->tos = tos; | |
1274 | iph->ttl = ttl; | |
1275 | iph->frag_off = frag_off; | |
1276 | ip_select_ident(iph, &rt_dst(rt), NULL); | |
d1eb60cc | 1277 | |
842cf6f4 JG |
1278 | skb = tnl_vport->tnl_ops->update_header(vport, mutable, &rt_dst(rt), skb); |
1279 | if (unlikely(!skb)) | |
1280 | goto next; | |
d1eb60cc | 1281 | |
842cf6f4 JG |
1282 | if (likely(cache)) { |
1283 | int orig_len = skb->len - cache->len; | |
17a07f9f | 1284 | struct vport *cache_vport = internal_dev_get_vport(rt_dst(rt).dev); |
d1eb60cc | 1285 | |
842cf6f4 | 1286 | skb->protocol = htons(ETH_P_IP); |
3d898123 | 1287 | iph = ip_hdr(skb); |
842cf6f4 JG |
1288 | iph->tot_len = htons(skb->len - skb_network_offset(skb)); |
1289 | ip_send_check(iph); | |
d1eb60cc | 1290 | |
17a07f9f | 1291 | if (cache_vport) { |
842cf6f4 JG |
1292 | OVS_CB(skb)->flow = cache->flow; |
1293 | compute_ip_summed(skb, true); | |
17a07f9f | 1294 | vport_receive(cache_vport, skb); |
842cf6f4 JG |
1295 | sent_len += orig_len; |
1296 | } else { | |
ddffedda | 1297 | int xmit_err; |
d1eb60cc | 1298 | |
842cf6f4 | 1299 | skb->dev = rt_dst(rt).dev; |
ddffedda | 1300 | xmit_err = dev_queue_xmit(skb); |
842cf6f4 | 1301 | |
ddffedda | 1302 | if (likely(net_xmit_eval(xmit_err) == 0)) |
842cf6f4 JG |
1303 | sent_len += orig_len; |
1304 | } | |
1305 | } else | |
1306 | sent_len += send_frags(skb, mutable); | |
1307 | ||
1308 | next: | |
d1eb60cc | 1309 | skb = next_skb; |
842cf6f4 | 1310 | } |
d1eb60cc | 1311 | |
842cf6f4 | 1312 | if (unlikely(sent_len == 0)) |
5214f5c4 JG |
1313 | vport_record_error(vport, VPORT_E_TX_DROPPED); |
1314 | ||
842cf6f4 | 1315 | goto out; |
d1eb60cc JG |
1316 | |
1317 | error_free: | |
842cf6f4 | 1318 | tnl_free_linked_skbs(skb); |
d1eb60cc | 1319 | error: |
842cf6f4 JG |
1320 | dst_release(unattached_dst); |
1321 | vport_record_error(vport, err); | |
1322 | out: | |
1323 | return sent_len; | |
d1eb60cc JG |
1324 | } |
1325 | ||
c3827f61 | 1326 | static int set_config(const void *config, const struct tnl_ops *tnl_ops, |
d1eb60cc JG |
1327 | const struct vport *cur_vport, |
1328 | struct tnl_mutable_config *mutable) | |
1329 | { | |
1330 | const struct vport *old_vport; | |
1331 | const struct tnl_mutable_config *old_mutable; | |
1332 | ||
c3827f61 | 1333 | mutable->port_config = *(struct tnl_port_config *)config; |
d1eb60cc | 1334 | |
842cf6f4 JG |
1335 | if (mutable->port_config.daddr == 0) |
1336 | return -EINVAL; | |
1337 | ||
1338 | if (mutable->port_config.tos != RT_TOS(mutable->port_config.tos)) | |
1339 | return -EINVAL; | |
1340 | ||
d1eb60cc JG |
1341 | mutable->tunnel_hlen = tnl_ops->hdr_len(&mutable->port_config); |
1342 | if (mutable->tunnel_hlen < 0) | |
1343 | return mutable->tunnel_hlen; | |
1344 | ||
1345 | mutable->tunnel_hlen += sizeof(struct iphdr); | |
1346 | ||
d1eb60cc JG |
1347 | mutable->tunnel_type = tnl_ops->tunnel_type; |
1348 | if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) { | |
1349 | mutable->tunnel_type |= TNL_T_KEY_MATCH; | |
1350 | mutable->port_config.in_key = 0; | |
1351 | } else | |
1352 | mutable->tunnel_type |= TNL_T_KEY_EXACT; | |
1353 | ||
1354 | old_vport = tnl_find_port(mutable->port_config.saddr, | |
1355 | mutable->port_config.daddr, | |
1356 | mutable->port_config.in_key, | |
1357 | mutable->tunnel_type, | |
1358 | &old_mutable); | |
1359 | ||
1360 | if (old_vport && old_vport != cur_vport) | |
1361 | return -EEXIST; | |
1362 | ||
1363 | if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) | |
1364 | mutable->port_config.out_key = 0; | |
1365 | ||
1366 | return 0; | |
1367 | } | |
1368 | ||
94903c98 | 1369 | struct vport *tnl_create(const struct vport_parms *parms, |
d1eb60cc JG |
1370 | const struct vport_ops *vport_ops, |
1371 | const struct tnl_ops *tnl_ops) | |
1372 | { | |
1373 | struct vport *vport; | |
1374 | struct tnl_vport *tnl_vport; | |
5214f5c4 | 1375 | int initial_frag_id; |
d1eb60cc JG |
1376 | int err; |
1377 | ||
e779d8d9 | 1378 | vport = vport_alloc(sizeof(struct tnl_vport), vport_ops, parms); |
d1eb60cc JG |
1379 | if (IS_ERR(vport)) { |
1380 | err = PTR_ERR(vport); | |
1381 | goto error; | |
1382 | } | |
1383 | ||
1384 | tnl_vport = tnl_vport_priv(vport); | |
1385 | ||
94903c98 | 1386 | strcpy(tnl_vport->name, parms->name); |
d1eb60cc JG |
1387 | tnl_vport->tnl_ops = tnl_ops; |
1388 | ||
842cf6f4 | 1389 | tnl_vport->mutable = kzalloc(sizeof(struct tnl_mutable_config), GFP_KERNEL); |
d1eb60cc JG |
1390 | if (!tnl_vport->mutable) { |
1391 | err = -ENOMEM; | |
1392 | goto error_free_vport; | |
1393 | } | |
1394 | ||
1395 | vport_gen_rand_ether_addr(tnl_vport->mutable->eth_addr); | |
1396 | tnl_vport->mutable->mtu = ETH_DATA_LEN; | |
1397 | ||
5214f5c4 JG |
1398 | get_random_bytes(&initial_frag_id, sizeof(int)); |
1399 | atomic_set(&tnl_vport->frag_id, initial_frag_id); | |
1400 | ||
94903c98 | 1401 | err = set_config(parms->config, tnl_ops, NULL, tnl_vport->mutable); |
d1eb60cc JG |
1402 | if (err) |
1403 | goto error_free_mutable; | |
1404 | ||
842cf6f4 JG |
1405 | spin_lock_init(&tnl_vport->cache_lock); |
1406 | ||
1407 | #ifdef NEED_CACHE_TIMEOUT | |
1408 | tnl_vport->cache_exp_interval = MAX_CACHE_EXP - | |
1409 | (net_random() % (MAX_CACHE_EXP / 2)); | |
1410 | #endif | |
1411 | ||
d1eb60cc JG |
1412 | err = add_port(vport); |
1413 | if (err) | |
1414 | goto error_free_mutable; | |
1415 | ||
1416 | return vport; | |
1417 | ||
1418 | error_free_mutable: | |
1419 | kfree(tnl_vport->mutable); | |
1420 | error_free_vport: | |
1421 | vport_free(vport); | |
1422 | error: | |
1423 | return ERR_PTR(err); | |
1424 | } | |
1425 | ||
c3827f61 | 1426 | int tnl_modify(struct vport *vport, struct odp_port *port) |
d1eb60cc JG |
1427 | { |
1428 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1429 | struct tnl_mutable_config *mutable; | |
1430 | int err; | |
d1eb60cc JG |
1431 | |
1432 | mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL); | |
1433 | if (!mutable) { | |
1434 | err = -ENOMEM; | |
1435 | goto error; | |
1436 | } | |
1437 | ||
c3827f61 | 1438 | err = set_config(port->config, tnl_vport->tnl_ops, vport, mutable); |
d1eb60cc JG |
1439 | if (err) |
1440 | goto error_free; | |
1441 | ||
842cf6f4 | 1442 | mutable->seq++; |
d1eb60cc | 1443 | |
842cf6f4 JG |
1444 | err = move_port(vport, mutable); |
1445 | if (err) | |
1446 | goto error_free; | |
d1eb60cc JG |
1447 | |
1448 | return 0; | |
1449 | ||
1450 | error_free: | |
1451 | kfree(mutable); | |
1452 | error: | |
1453 | return err; | |
1454 | } | |
1455 | ||
842cf6f4 | 1456 | static void free_port_rcu(struct rcu_head *rcu) |
d1eb60cc JG |
1457 | { |
1458 | struct tnl_vport *tnl_vport = container_of(rcu, struct tnl_vport, rcu); | |
1459 | ||
842cf6f4 JG |
1460 | spin_lock_bh(&tnl_vport->cache_lock); |
1461 | free_cache(tnl_vport->cache); | |
1462 | spin_unlock_bh(&tnl_vport->cache_lock); | |
1463 | ||
d1eb60cc JG |
1464 | kfree(tnl_vport->mutable); |
1465 | vport_free(tnl_vport_to_vport(tnl_vport)); | |
1466 | } | |
1467 | ||
1468 | int tnl_destroy(struct vport *vport) | |
1469 | { | |
1470 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1471 | const struct tnl_mutable_config *old_mutable; | |
1472 | ||
1473 | if (vport == tnl_find_port(tnl_vport->mutable->port_config.saddr, | |
1474 | tnl_vport->mutable->port_config.daddr, | |
1475 | tnl_vport->mutable->port_config.in_key, | |
1476 | tnl_vport->mutable->tunnel_type, | |
1477 | &old_mutable)) | |
1478 | del_port(vport); | |
1479 | ||
842cf6f4 | 1480 | call_rcu(&tnl_vport->rcu, free_port_rcu); |
d1eb60cc JG |
1481 | |
1482 | return 0; | |
1483 | } | |
1484 | ||
1485 | int tnl_set_mtu(struct vport *vport, int mtu) | |
1486 | { | |
1487 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1488 | struct tnl_mutable_config *mutable; | |
1489 | ||
1490 | mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL); | |
1491 | if (!mutable) | |
1492 | return -ENOMEM; | |
1493 | ||
1494 | mutable->mtu = mtu; | |
1495 | assign_config_rcu(vport, mutable); | |
1496 | ||
1497 | return 0; | |
1498 | } | |
1499 | ||
1500 | int tnl_set_addr(struct vport *vport, const unsigned char *addr) | |
1501 | { | |
1502 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1503 | struct tnl_mutable_config *mutable; | |
1504 | ||
1505 | mutable = kmemdup(tnl_vport->mutable, sizeof(struct tnl_mutable_config), GFP_KERNEL); | |
1506 | if (!mutable) | |
1507 | return -ENOMEM; | |
1508 | ||
1509 | memcpy(mutable->eth_addr, addr, ETH_ALEN); | |
1510 | assign_config_rcu(vport, mutable); | |
1511 | ||
1512 | return 0; | |
1513 | } | |
1514 | ||
d1eb60cc JG |
1515 | const char *tnl_get_name(const struct vport *vport) |
1516 | { | |
1517 | const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1518 | return tnl_vport->name; | |
1519 | } | |
1520 | ||
1521 | const unsigned char *tnl_get_addr(const struct vport *vport) | |
1522 | { | |
1523 | const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
e33adfd0 | 1524 | return rcu_dereference_rtnl(tnl_vport->mutable)->eth_addr; |
d1eb60cc JG |
1525 | } |
1526 | ||
dd851cbb JP |
1527 | void tnl_get_config(const struct vport *vport, void *config) |
1528 | { | |
1529 | const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
1530 | struct tnl_port_config *port_config; | |
1531 | ||
1532 | port_config = &rcu_dereference_rtnl(tnl_vport->mutable)->port_config; | |
1533 | memcpy(config, port_config, sizeof(*port_config)); | |
1534 | } | |
1535 | ||
d1eb60cc JG |
1536 | int tnl_get_mtu(const struct vport *vport) |
1537 | { | |
1538 | const struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
e33adfd0 | 1539 | return rcu_dereference_rtnl(tnl_vport->mutable)->mtu; |
d1eb60cc | 1540 | } |
842cf6f4 JG |
1541 | |
1542 | void tnl_free_linked_skbs(struct sk_buff *skb) | |
1543 | { | |
1544 | if (unlikely(!skb)) | |
1545 | return; | |
1546 | ||
1547 | while (skb) { | |
1548 | struct sk_buff *next = skb->next; | |
1549 | kfree_skb(skb); | |
1550 | skb = next; | |
1551 | } | |
1552 | } |