]>
Commit | Line | Data |
---|---|---|
a6ae068b LJ |
1 | /* |
2 | * Copyright (c) 2011 Nicira, Inc. | |
3 | * Copyright (c) 2013 Cisco Systems, Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
17 | * 02110-1301, USA | |
18 | */ | |
19 | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | ||
22 | #include <linux/version.h> | |
23 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) | |
24 | ||
25 | #include <linux/in.h> | |
26 | #include <linux/ip.h> | |
a6ae068b | 27 | #include <linux/net.h> |
85c9de19 | 28 | #include <linux/rculist.h> |
a6ae068b LJ |
29 | #include <linux/udp.h> |
30 | ||
31 | #include <net/icmp.h> | |
32 | #include <net/ip.h> | |
11aa8dff | 33 | #include <net/route.h> |
a6ae068b | 34 | #include <net/udp.h> |
11aa8dff | 35 | #include <net/xfrm.h> |
a6ae068b LJ |
36 | |
37 | #include "datapath.h" | |
a6ae068b LJ |
38 | #include "vport.h" |
39 | ||
a6ae068b LJ |
40 | /* |
41 | * LISP encapsulation header: | |
42 | * | |
43 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
44 | * |N|L|E|V|I|flags| Nonce/Map-Version | | |
45 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
46 | * | Instance ID/Locator Status Bits | | |
47 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
48 | * | |
49 | */ | |
50 | ||
51 | /** | |
52 | * struct lisphdr - LISP header | |
53 | * @nonce_present: Flag indicating the presence of a 24 bit nonce value. | |
54 | * @locator_status_bits_present: Flag indicating the presence of Locator Status | |
55 | * Bits (LSB). | |
56 | * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. | |
57 | * @map_version_present: Flag indicating the use of mapping versioning. | |
58 | * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. | |
59 | * @reserved_flags: 3 bits reserved for future flags. | |
60 | * @nonce: 24 bit nonce value. | |
61 | * @map_version: 24 bit mapping version. | |
62 | * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present | |
63 | * is not set, 8 bits when it is. | |
64 | * @instance_id: 24 bit Instance ID | |
65 | */ | |
66 | struct lisphdr { | |
67 | #ifdef __LITTLE_ENDIAN_BITFIELD | |
68 | __u8 reserved_flags:3; | |
69 | __u8 instance_id_present:1; | |
70 | __u8 map_version_present:1; | |
71 | __u8 solicit_echo_nonce:1; | |
72 | __u8 locator_status_bits_present:1; | |
73 | __u8 nonce_present:1; | |
74 | #else | |
75 | __u8 nonce_present:1; | |
76 | __u8 locator_status_bits_present:1; | |
77 | __u8 solicit_echo_nonce:1; | |
78 | __u8 map_version_present:1; | |
79 | __u8 instance_id_present:1; | |
80 | __u8 reserved_flags:3; | |
81 | #endif | |
82 | union { | |
83 | __u8 nonce[3]; | |
84 | __u8 map_version[3]; | |
85 | } u1; | |
86 | union { | |
87 | __be32 locator_status_bits; | |
88 | struct { | |
89 | __u8 instance_id[3]; | |
90 | __u8 locator_status_bits; | |
91 | } word2; | |
92 | } u2; | |
93 | }; | |
94 | ||
95 | #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) | |
96 | ||
a6ae068b LJ |
97 | /** |
98 | * struct lisp_port - Keeps track of open UDP ports | |
c405d282 PS |
99 | * @dst_port: lisp UDP port no. |
100 | * @list: list element in @lisp_ports. | |
101 | * @lisp_rcv_socket: The socket created for this port number. | |
102 | * @name: vport name. | |
a6ae068b LJ |
103 | */ |
104 | struct lisp_port { | |
c405d282 | 105 | __be16 dst_port; |
a6ae068b | 106 | struct list_head list; |
a6ae068b | 107 | struct socket *lisp_rcv_socket; |
c405d282 | 108 | char name[IFNAMSIZ]; |
a6ae068b LJ |
109 | }; |
110 | ||
111 | static LIST_HEAD(lisp_ports); | |
112 | ||
c405d282 PS |
113 | static inline struct lisp_port *lisp_vport(const struct vport *vport) |
114 | { | |
115 | return vport_priv(vport); | |
116 | } | |
117 | ||
85c9de19 | 118 | static struct lisp_port *lisp_find_port(struct net *net, __be16 port) |
a6ae068b LJ |
119 | { |
120 | struct lisp_port *lisp_port; | |
121 | ||
85c9de19 | 122 | list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { |
c405d282 | 123 | if (lisp_port->dst_port == port && |
a6ae068b LJ |
124 | net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) |
125 | return lisp_port; | |
126 | } | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) | |
132 | { | |
133 | return (struct lisphdr *)(udp_hdr(skb) + 1); | |
134 | } | |
135 | ||
a6ae068b LJ |
136 | /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ |
137 | static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) | |
138 | { | |
139 | ||
140 | #ifdef __BIG_ENDIAN | |
141 | iid[0] = (__force __u8)(tun_id >> 16); | |
142 | iid[1] = (__force __u8)(tun_id >> 8); | |
143 | iid[2] = (__force __u8)tun_id; | |
144 | #else | |
145 | iid[0] = (__force __u8)((__force u64)tun_id >> 40); | |
146 | iid[1] = (__force __u8)((__force u64)tun_id >> 48); | |
147 | iid[2] = (__force __u8)((__force u64)tun_id >> 56); | |
148 | #endif | |
149 | } | |
150 | ||
151 | /* Convert 24 bit Instance ID to 64 bit tunnel ID. */ | |
152 | static __be64 instance_id_to_tunnel_id(__u8 *iid) | |
153 | { | |
154 | #ifdef __BIG_ENDIAN | |
155 | return (iid[0] << 16) | (iid[1] << 8) | iid[2]; | |
156 | #else | |
157 | return (__force __be64)(((__force u64)iid[0] << 40) | | |
158 | ((__force u64)iid[1] << 48) | | |
159 | ((__force u64)iid[2] << 56)); | |
160 | #endif | |
161 | } | |
162 | ||
11aa8dff PS |
163 | /* Compute source UDP port for outgoing packet. |
164 | * Currently we use the flow hash. | |
165 | */ | |
166 | static u16 ovs_tnl_get_src_port(struct sk_buff *skb) | |
167 | { | |
168 | int low; | |
169 | int high; | |
170 | unsigned int range; | |
171 | struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; | |
172 | u32 hash = jhash2((const u32 *)pkt_key, | |
173 | sizeof(*pkt_key) / sizeof(u32), 0); | |
174 | ||
175 | inet_get_local_port_range(&low, &high); | |
176 | range = (high - low) + 1; | |
177 | return (((u64) hash * range) >> 32) + low; | |
178 | } | |
179 | ||
85c9de19 PS |
180 | static void lisp_build_header(const struct vport *vport, |
181 | struct sk_buff *skb, | |
182 | int tunnel_hlen) | |
a6ae068b | 183 | { |
c405d282 | 184 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b LJ |
185 | struct udphdr *udph = udp_hdr(skb); |
186 | struct lisphdr *lisph = (struct lisphdr *)(udph + 1); | |
187 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | |
a6ae068b | 188 | |
c405d282 | 189 | udph->dest = lisp_port->dst_port; |
a6ae068b LJ |
190 | udph->source = htons(ovs_tnl_get_src_port(skb)); |
191 | udph->check = 0; | |
192 | udph->len = htons(skb->len - skb_transport_offset(skb)); | |
193 | ||
194 | lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ | |
195 | lisph->locator_status_bits_present = 1; /* Set LSB */ | |
196 | lisph->solicit_echo_nonce = 0; /* No echo noncing */ | |
197 | lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ | |
198 | lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ | |
199 | lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ | |
200 | ||
201 | lisph->u1.nonce[0] = 0; | |
202 | lisph->u1.nonce[1] = 0; | |
203 | lisph->u1.nonce[2] = 0; | |
204 | ||
85c9de19 | 205 | tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); |
a6ae068b | 206 | lisph->u2.word2.locator_status_bits = 1; |
a6ae068b LJ |
207 | } |
208 | ||
11aa8dff PS |
209 | /** |
210 | * ovs_tnl_rcv - ingress point for generic tunnel code | |
211 | * | |
212 | * @vport: port this packet was received on | |
213 | * @skb: received packet | |
a515e4a8 | 214 | * @tun_key: tunnel that carried packet |
11aa8dff PS |
215 | * |
216 | * Must be called with rcu_read_lock. | |
217 | * | |
218 | * Packets received by this function are in the following state: | |
219 | * - skb->data points to the inner Ethernet header. | |
220 | * - The inner Ethernet header is in the linear data area. | |
11aa8dff PS |
221 | * - The layer pointers are undefined. |
222 | */ | |
223 | static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, | |
224 | struct ovs_key_ipv4_tunnel *tun_key) | |
225 | { | |
226 | struct ethhdr *eh; | |
227 | ||
228 | skb_reset_mac_header(skb); | |
229 | eh = eth_hdr(skb); | |
230 | ||
231 | if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) | |
232 | skb->protocol = eh->h_proto; | |
233 | else | |
234 | skb->protocol = htons(ETH_P_802_2); | |
235 | ||
236 | skb_dst_drop(skb); | |
237 | nf_reset(skb); | |
238 | skb_clear_rxhash(skb); | |
239 | secpath_reset(skb); | |
240 | vlan_set_tci(skb, 0); | |
241 | ||
242 | if (unlikely(compute_ip_summed(skb, false))) { | |
243 | kfree_skb(skb); | |
244 | return; | |
245 | } | |
246 | ||
247 | ovs_vport_receive(vport, skb, tun_key); | |
248 | } | |
249 | ||
a6ae068b LJ |
250 | /* Called with rcu_read_lock and BH disabled. */ |
251 | static int lisp_rcv(struct sock *sk, struct sk_buff *skb) | |
252 | { | |
85c9de19 | 253 | struct lisp_port *lisp_port; |
a6ae068b | 254 | struct lisphdr *lisph; |
a6ae068b LJ |
255 | struct iphdr *iph, *inner_iph; |
256 | struct ovs_key_ipv4_tunnel tun_key; | |
257 | __be64 key; | |
a6ae068b LJ |
258 | struct ethhdr *ethh; |
259 | __be16 protocol; | |
260 | ||
85c9de19 PS |
261 | lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); |
262 | if (unlikely(!lisp_port)) | |
263 | goto error; | |
264 | ||
a6ae068b LJ |
265 | if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) |
266 | goto error; | |
267 | ||
268 | lisph = lisp_hdr(skb); | |
269 | ||
270 | skb_pull_rcsum(skb, LISP_HLEN); | |
271 | ||
272 | if (lisph->instance_id_present != 1) | |
273 | key = 0; | |
274 | else | |
275 | key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); | |
276 | ||
a6ae068b | 277 | /* Save outer tunnel values */ |
85c9de19 | 278 | iph = ip_hdr(skb); |
adda018c | 279 | ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); |
a6ae068b LJ |
280 | |
281 | /* Drop non-IP inner packets */ | |
282 | inner_iph = (struct iphdr *)(lisph + 1); | |
283 | switch (inner_iph->version) { | |
284 | case 4: | |
285 | protocol = htons(ETH_P_IP); | |
286 | break; | |
287 | case 6: | |
288 | protocol = htons(ETH_P_IPV6); | |
289 | break; | |
290 | default: | |
291 | goto error; | |
292 | } | |
293 | ||
294 | /* Add Ethernet header */ | |
295 | ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); | |
296 | memset(ethh, 0, ETH_HLEN); | |
297 | ethh->h_dest[0] = 0x02; | |
298 | ethh->h_source[0] = 0x02; | |
299 | ethh->h_proto = protocol; | |
300 | ||
3cfede14 PS |
301 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
302 | ||
db0c3134 | 303 | ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); |
a6ae068b LJ |
304 | goto out; |
305 | ||
306 | error: | |
307 | kfree_skb(skb); | |
308 | out: | |
309 | return 0; | |
310 | } | |
311 | ||
312 | /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ | |
313 | #define UDP_ENCAP_LISP 1 | |
314 | static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) | |
315 | { | |
a6ae068b | 316 | struct sockaddr_in sin; |
c405d282 | 317 | int err; |
a6ae068b LJ |
318 | |
319 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
320 | &lisp_port->lisp_rcv_socket); | |
321 | if (err) | |
322 | goto error; | |
323 | ||
324 | /* release net ref. */ | |
325 | sk_change_net(lisp_port->lisp_rcv_socket->sk, net); | |
326 | ||
327 | sin.sin_family = AF_INET; | |
328 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | |
c405d282 | 329 | sin.sin_port = lisp_port->dst_port; |
a6ae068b LJ |
330 | |
331 | err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, | |
332 | sizeof(struct sockaddr_in)); | |
333 | if (err) | |
334 | goto error_sock; | |
335 | ||
336 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_type = UDP_ENCAP_LISP; | |
337 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_rcv = lisp_rcv; | |
338 | ||
339 | udp_encap_enable(); | |
340 | ||
341 | return 0; | |
342 | ||
343 | error_sock: | |
344 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
345 | error: | |
346 | pr_warn("cannot register lisp protocol handler: %d\n", err); | |
347 | return err; | |
348 | } | |
349 | ||
c405d282 | 350 | static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) |
a6ae068b | 351 | { |
c405d282 | 352 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b | 353 | |
c405d282 PS |
354 | if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) |
355 | return -EMSGSIZE; | |
356 | return 0; | |
a6ae068b LJ |
357 | } |
358 | ||
c405d282 | 359 | static void lisp_tnl_destroy(struct vport *vport) |
a6ae068b | 360 | { |
c405d282 PS |
361 | struct lisp_port *lisp_port = lisp_vport(vport); |
362 | ||
85c9de19 PS |
363 | list_del_rcu(&lisp_port->list); |
364 | /* Release socket */ | |
365 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
c405d282 PS |
366 | |
367 | ovs_vport_deferred_free(vport); | |
85c9de19 PS |
368 | } |
369 | ||
c405d282 | 370 | static struct vport *lisp_tnl_create(const struct vport_parms *parms) |
85c9de19 | 371 | { |
c405d282 PS |
372 | struct net *net = ovs_dp_get_net(parms->dp); |
373 | struct nlattr *options = parms->options; | |
85c9de19 | 374 | struct lisp_port *lisp_port; |
c405d282 | 375 | struct vport *vport; |
a6ae068b LJ |
376 | struct nlattr *a; |
377 | int err; | |
378 | u16 dst_port; | |
a6ae068b LJ |
379 | |
380 | if (!options) { | |
381 | err = -EINVAL; | |
c405d282 | 382 | goto error; |
a6ae068b LJ |
383 | } |
384 | ||
385 | a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); | |
386 | if (a && nla_len(a) == sizeof(u16)) { | |
387 | dst_port = nla_get_u16(a); | |
388 | } else { | |
389 | /* Require destination port from userspace. */ | |
390 | err = -EINVAL; | |
c405d282 | 391 | goto error; |
a6ae068b LJ |
392 | } |
393 | ||
394 | /* Verify if we already have a socket created for this port */ | |
c405d282 | 395 | if (lisp_find_port(net, htons(dst_port))) { |
85c9de19 | 396 | err = -EEXIST; |
c405d282 | 397 | goto error; |
a6ae068b LJ |
398 | } |
399 | ||
c405d282 PS |
400 | vport = ovs_vport_alloc(sizeof(struct lisp_port), |
401 | &ovs_lisp_vport_ops, parms); | |
402 | if (IS_ERR(vport)) | |
403 | return vport; | |
a6ae068b | 404 | |
c405d282 PS |
405 | lisp_port = lisp_vport(vport); |
406 | lisp_port->dst_port = htons(dst_port); | |
407 | strncpy(lisp_port->name, parms->name, IFNAMSIZ); | |
a6ae068b LJ |
408 | |
409 | err = lisp_socket_init(lisp_port, net); | |
410 | if (err) | |
c405d282 | 411 | goto error_free; |
a6ae068b | 412 | |
c405d282 PS |
413 | list_add_tail_rcu(&lisp_port->list, &lisp_ports); |
414 | return vport; | |
a6ae068b | 415 | |
c405d282 PS |
416 | error_free: |
417 | ovs_vport_free(vport); | |
a6ae068b | 418 | error: |
c405d282 | 419 | return ERR_PTR(err); |
a6ae068b LJ |
420 | } |
421 | ||
11aa8dff PS |
422 | static bool need_linearize(const struct sk_buff *skb) |
423 | { | |
424 | int i; | |
425 | ||
426 | if (unlikely(skb_shinfo(skb)->frag_list)) | |
427 | return true; | |
428 | ||
429 | /* | |
430 | * Generally speaking we should linearize if there are paged frags. | |
431 | * However, if all of the refcounts are 1 we know nobody else can | |
432 | * change them from underneath us and we can skip the linearization. | |
433 | */ | |
434 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
435 | if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) | |
436 | return true; | |
437 | ||
438 | return false; | |
439 | } | |
440 | ||
441 | static struct sk_buff *handle_offloads(struct sk_buff *skb) | |
442 | { | |
443 | int err; | |
444 | ||
445 | forward_ip_summed(skb, true); | |
446 | ||
447 | ||
448 | if (skb_is_gso(skb)) { | |
449 | struct sk_buff *nskb; | |
450 | char cb[sizeof(skb->cb)]; | |
451 | ||
452 | memcpy(cb, skb->cb, sizeof(cb)); | |
453 | ||
454 | nskb = __skb_gso_segment(skb, 0, false); | |
455 | if (IS_ERR(nskb)) { | |
456 | err = PTR_ERR(nskb); | |
457 | goto error; | |
458 | } | |
459 | ||
460 | consume_skb(skb); | |
461 | skb = nskb; | |
462 | while (nskb) { | |
463 | memcpy(nskb->cb, cb, sizeof(cb)); | |
464 | nskb = nskb->next; | |
465 | } | |
466 | } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { | |
467 | /* Pages aren't locked and could change at any time. | |
468 | * If this happens after we compute the checksum, the | |
469 | * checksum will be wrong. We linearize now to avoid | |
470 | * this problem. | |
471 | */ | |
472 | if (unlikely(need_linearize(skb))) { | |
473 | err = __skb_linearize(skb); | |
474 | if (unlikely(err)) | |
475 | goto error; | |
476 | } | |
477 | ||
478 | err = skb_checksum_help(skb); | |
479 | if (unlikely(err)) | |
480 | goto error; | |
481 | } | |
482 | ||
483 | set_ip_summed(skb, OVS_CSUM_NONE); | |
484 | ||
485 | return skb; | |
486 | ||
487 | error: | |
488 | return ERR_PTR(err); | |
489 | } | |
490 | ||
491 | static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, | |
492 | u8 ipproto, int tunnel_hlen, | |
493 | void (*build_header)(const struct vport *, | |
494 | struct sk_buff *, | |
495 | int tunnel_hlen)) | |
496 | { | |
497 | int min_headroom; | |
498 | struct rtable *rt; | |
499 | __be32 saddr; | |
500 | int sent_len = 0; | |
501 | int err; | |
502 | struct sk_buff *nskb; | |
503 | ||
504 | /* Route lookup */ | |
505 | saddr = OVS_CB(skb)->tun_key->ipv4_src; | |
506 | rt = find_route(ovs_dp_get_net(vport->dp), | |
507 | &saddr, | |
508 | OVS_CB(skb)->tun_key->ipv4_dst, | |
509 | ipproto, | |
510 | OVS_CB(skb)->tun_key->ipv4_tos, | |
511 | skb_get_mark(skb)); | |
512 | if (IS_ERR(rt)) { | |
513 | err = PTR_ERR(rt); | |
514 | goto error; | |
515 | } | |
516 | ||
517 | tunnel_hlen += sizeof(struct iphdr); | |
518 | ||
519 | min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len | |
520 | + tunnel_hlen | |
521 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | |
522 | ||
523 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | |
524 | int head_delta = SKB_DATA_ALIGN(min_headroom - | |
525 | skb_headroom(skb) + | |
526 | 16); | |
527 | ||
528 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | |
529 | 0, GFP_ATOMIC); | |
530 | if (unlikely(err)) | |
531 | goto err_free_rt; | |
532 | } | |
533 | ||
534 | /* Offloading */ | |
535 | nskb = handle_offloads(skb); | |
536 | if (IS_ERR(nskb)) { | |
537 | err = PTR_ERR(nskb); | |
538 | goto err_free_rt; | |
539 | } | |
540 | skb = nskb; | |
541 | ||
542 | /* Reset SKB */ | |
543 | nf_reset(skb); | |
544 | secpath_reset(skb); | |
545 | skb_dst_drop(skb); | |
546 | skb_clear_rxhash(skb); | |
547 | ||
548 | while (skb) { | |
549 | struct sk_buff *next_skb = skb->next; | |
550 | struct iphdr *iph; | |
551 | int frag_len; | |
552 | ||
553 | skb->next = NULL; | |
554 | ||
555 | if (unlikely(vlan_deaccel_tag(skb))) | |
556 | goto next; | |
557 | ||
558 | frag_len = skb->len; | |
559 | skb_push(skb, tunnel_hlen); | |
560 | skb_reset_network_header(skb); | |
561 | skb_set_transport_header(skb, sizeof(struct iphdr)); | |
562 | ||
563 | if (next_skb) | |
564 | skb_dst_set(skb, dst_clone(&rt_dst(rt))); | |
565 | else | |
566 | skb_dst_set(skb, &rt_dst(rt)); | |
567 | ||
568 | /* Push Tunnel header. */ | |
569 | build_header(vport, skb, tunnel_hlen); | |
570 | ||
571 | /* Push IP header. */ | |
572 | iph = ip_hdr(skb); | |
573 | iph->version = 4; | |
574 | iph->ihl = sizeof(struct iphdr) >> 2; | |
575 | iph->protocol = ipproto; | |
576 | iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; | |
577 | iph->saddr = saddr; | |
578 | iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; | |
579 | iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; | |
580 | iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & | |
581 | TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; | |
582 | /* | |
583 | * Allow our local IP stack to fragment the outer packet even | |
584 | * if the DF bit is set as a last resort. We also need to | |
585 | * force selection of an IP ID here with __ip_select_ident(), | |
586 | * as ip_select_ident() assumes a proper ID is not needed when | |
587 | * when the DF bit is set. | |
588 | */ | |
589 | skb->local_df = 1; | |
590 | __ip_select_ident(iph, skb_dst(skb), 0); | |
591 | ||
592 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | |
593 | ||
594 | err = ip_local_out(skb); | |
595 | if (unlikely(net_xmit_eval(err))) | |
596 | goto next; | |
597 | ||
598 | sent_len += frag_len; | |
599 | ||
600 | next: | |
601 | skb = next_skb; | |
602 | } | |
603 | ||
604 | return sent_len; | |
605 | ||
606 | err_free_rt: | |
607 | ip_rt_put(rt); | |
608 | error: | |
609 | return err; | |
610 | } | |
611 | ||
c405d282 | 612 | static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) |
a6ae068b | 613 | { |
c405d282 PS |
614 | int tnl_len; |
615 | int network_offset = skb_network_offset(skb); | |
a6ae068b | 616 | |
be7cd27e PS |
617 | if (unlikely(!OVS_CB(skb)->tun_key)) |
618 | return -EINVAL; | |
a6ae068b | 619 | |
c405d282 PS |
620 | /* We only encapsulate IPv4 and IPv6 packets */ |
621 | switch (skb->protocol) { | |
622 | case htons(ETH_P_IP): | |
623 | case htons(ETH_P_IPV6): | |
624 | /* Pop off "inner" Ethernet header */ | |
625 | skb_pull(skb, network_offset); | |
626 | tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, | |
627 | LISP_HLEN, lisp_build_header); | |
628 | return tnl_len > 0 ? tnl_len + network_offset : tnl_len; | |
629 | default: | |
fe7744e8 | 630 | kfree_skb(skb); |
be7cd27e | 631 | return 0; |
c405d282 | 632 | } |
a6ae068b LJ |
633 | } |
634 | ||
c405d282 | 635 | static const char *lisp_get_name(const struct vport *vport) |
a6ae068b | 636 | { |
c405d282 PS |
637 | struct lisp_port *lisp_port = lisp_vport(vport); |
638 | return lisp_port->name; | |
a6ae068b LJ |
639 | } |
640 | ||
641 | const struct vport_ops ovs_lisp_vport_ops = { | |
642 | .type = OVS_VPORT_TYPE_LISP, | |
a6ae068b LJ |
643 | .create = lisp_tnl_create, |
644 | .destroy = lisp_tnl_destroy, | |
c405d282 | 645 | .get_name = lisp_get_name, |
85c9de19 | 646 | .get_options = lisp_get_options, |
a6ae068b LJ |
647 | .send = lisp_tnl_send, |
648 | }; | |
649 | #else | |
650 | #warning LISP tunneling will not be available on kernels before 2.6.26 | |
651 | #endif /* Linux kernel < 2.6.26 */ |