]>
Commit | Line | Data |
---|---|---|
a6ae068b LJ |
1 | /* |
2 | * Copyright (c) 2011 Nicira, Inc. | |
3 | * Copyright (c) 2013 Cisco Systems, Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
17 | * 02110-1301, USA | |
18 | */ | |
19 | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | ||
22 | #include <linux/version.h> | |
23 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) | |
24 | ||
25 | #include <linux/in.h> | |
26 | #include <linux/ip.h> | |
a6ae068b | 27 | #include <linux/net.h> |
85c9de19 | 28 | #include <linux/rculist.h> |
a6ae068b LJ |
29 | #include <linux/udp.h> |
30 | ||
31 | #include <net/icmp.h> | |
32 | #include <net/ip.h> | |
11aa8dff | 33 | #include <net/route.h> |
a6ae068b | 34 | #include <net/udp.h> |
11aa8dff | 35 | #include <net/xfrm.h> |
a6ae068b LJ |
36 | |
37 | #include "datapath.h" | |
a6ae068b LJ |
38 | #include "vport.h" |
39 | ||
a6ae068b LJ |
40 | /* |
41 | * LISP encapsulation header: | |
42 | * | |
43 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
44 | * |N|L|E|V|I|flags| Nonce/Map-Version | | |
45 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
46 | * | Instance ID/Locator Status Bits | | |
47 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
48 | * | |
49 | */ | |
50 | ||
51 | /** | |
52 | * struct lisphdr - LISP header | |
53 | * @nonce_present: Flag indicating the presence of a 24 bit nonce value. | |
54 | * @locator_status_bits_present: Flag indicating the presence of Locator Status | |
55 | * Bits (LSB). | |
56 | * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. | |
57 | * @map_version_present: Flag indicating the use of mapping versioning. | |
58 | * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. | |
59 | * @reserved_flags: 3 bits reserved for future flags. | |
60 | * @nonce: 24 bit nonce value. | |
61 | * @map_version: 24 bit mapping version. | |
62 | * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present | |
63 | * is not set, 8 bits when it is. | |
64 | * @instance_id: 24 bit Instance ID | |
65 | */ | |
66 | struct lisphdr { | |
67 | #ifdef __LITTLE_ENDIAN_BITFIELD | |
68 | __u8 reserved_flags:3; | |
69 | __u8 instance_id_present:1; | |
70 | __u8 map_version_present:1; | |
71 | __u8 solicit_echo_nonce:1; | |
72 | __u8 locator_status_bits_present:1; | |
73 | __u8 nonce_present:1; | |
74 | #else | |
75 | __u8 nonce_present:1; | |
76 | __u8 locator_status_bits_present:1; | |
77 | __u8 solicit_echo_nonce:1; | |
78 | __u8 map_version_present:1; | |
79 | __u8 instance_id_present:1; | |
80 | __u8 reserved_flags:3; | |
81 | #endif | |
82 | union { | |
83 | __u8 nonce[3]; | |
84 | __u8 map_version[3]; | |
85 | } u1; | |
86 | union { | |
87 | __be32 locator_status_bits; | |
88 | struct { | |
89 | __u8 instance_id[3]; | |
90 | __u8 locator_status_bits; | |
91 | } word2; | |
92 | } u2; | |
93 | }; | |
94 | ||
95 | #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) | |
96 | ||
a6ae068b LJ |
97 | /** |
98 | * struct lisp_port - Keeps track of open UDP ports | |
c405d282 PS |
99 | * @dst_port: lisp UDP port no. |
100 | * @list: list element in @lisp_ports. | |
101 | * @lisp_rcv_socket: The socket created for this port number. | |
102 | * @name: vport name. | |
a6ae068b LJ |
103 | */ |
104 | struct lisp_port { | |
c405d282 | 105 | __be16 dst_port; |
a6ae068b | 106 | struct list_head list; |
a6ae068b | 107 | struct socket *lisp_rcv_socket; |
c405d282 | 108 | char name[IFNAMSIZ]; |
a6ae068b LJ |
109 | }; |
110 | ||
111 | static LIST_HEAD(lisp_ports); | |
112 | ||
c405d282 PS |
113 | static inline struct lisp_port *lisp_vport(const struct vport *vport) |
114 | { | |
115 | return vport_priv(vport); | |
116 | } | |
117 | ||
85c9de19 | 118 | static struct lisp_port *lisp_find_port(struct net *net, __be16 port) |
a6ae068b LJ |
119 | { |
120 | struct lisp_port *lisp_port; | |
121 | ||
85c9de19 | 122 | list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { |
c405d282 | 123 | if (lisp_port->dst_port == port && |
a6ae068b LJ |
124 | net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) |
125 | return lisp_port; | |
126 | } | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) | |
132 | { | |
133 | return (struct lisphdr *)(udp_hdr(skb) + 1); | |
134 | } | |
135 | ||
a6ae068b LJ |
136 | /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ |
137 | static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) | |
138 | { | |
139 | ||
140 | #ifdef __BIG_ENDIAN | |
141 | iid[0] = (__force __u8)(tun_id >> 16); | |
142 | iid[1] = (__force __u8)(tun_id >> 8); | |
143 | iid[2] = (__force __u8)tun_id; | |
144 | #else | |
145 | iid[0] = (__force __u8)((__force u64)tun_id >> 40); | |
146 | iid[1] = (__force __u8)((__force u64)tun_id >> 48); | |
147 | iid[2] = (__force __u8)((__force u64)tun_id >> 56); | |
148 | #endif | |
149 | } | |
150 | ||
151 | /* Convert 24 bit Instance ID to 64 bit tunnel ID. */ | |
152 | static __be64 instance_id_to_tunnel_id(__u8 *iid) | |
153 | { | |
154 | #ifdef __BIG_ENDIAN | |
155 | return (iid[0] << 16) | (iid[1] << 8) | iid[2]; | |
156 | #else | |
157 | return (__force __be64)(((__force u64)iid[0] << 40) | | |
158 | ((__force u64)iid[1] << 48) | | |
159 | ((__force u64)iid[2] << 56)); | |
160 | #endif | |
161 | } | |
162 | ||
11aa8dff PS |
163 | /* Compute source UDP port for outgoing packet. |
164 | * Currently we use the flow hash. | |
165 | */ | |
166 | static u16 ovs_tnl_get_src_port(struct sk_buff *skb) | |
167 | { | |
168 | int low; | |
169 | int high; | |
170 | unsigned int range; | |
171 | struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; | |
172 | u32 hash = jhash2((const u32 *)pkt_key, | |
173 | sizeof(*pkt_key) / sizeof(u32), 0); | |
174 | ||
175 | inet_get_local_port_range(&low, &high); | |
176 | range = (high - low) + 1; | |
177 | return (((u64) hash * range) >> 32) + low; | |
178 | } | |
179 | ||
85c9de19 PS |
180 | static void lisp_build_header(const struct vport *vport, |
181 | struct sk_buff *skb, | |
182 | int tunnel_hlen) | |
a6ae068b | 183 | { |
c405d282 | 184 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b LJ |
185 | struct udphdr *udph = udp_hdr(skb); |
186 | struct lisphdr *lisph = (struct lisphdr *)(udph + 1); | |
187 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | |
a6ae068b | 188 | |
c405d282 | 189 | udph->dest = lisp_port->dst_port; |
a6ae068b LJ |
190 | udph->source = htons(ovs_tnl_get_src_port(skb)); |
191 | udph->check = 0; | |
192 | udph->len = htons(skb->len - skb_transport_offset(skb)); | |
193 | ||
194 | lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ | |
195 | lisph->locator_status_bits_present = 1; /* Set LSB */ | |
196 | lisph->solicit_echo_nonce = 0; /* No echo noncing */ | |
197 | lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ | |
198 | lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ | |
199 | lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ | |
200 | ||
201 | lisph->u1.nonce[0] = 0; | |
202 | lisph->u1.nonce[1] = 0; | |
203 | lisph->u1.nonce[2] = 0; | |
204 | ||
85c9de19 | 205 | tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); |
a6ae068b | 206 | lisph->u2.word2.locator_status_bits = 1; |
a6ae068b LJ |
207 | } |
208 | ||
11aa8dff PS |
209 | /** |
210 | * ovs_tnl_rcv - ingress point for generic tunnel code | |
211 | * | |
212 | * @vport: port this packet was received on | |
213 | * @skb: received packet | |
214 | * @tos: ToS from encapsulating IP packet, used to copy ECN bits | |
215 | * | |
216 | * Must be called with rcu_read_lock. | |
217 | * | |
218 | * Packets received by this function are in the following state: | |
219 | * - skb->data points to the inner Ethernet header. | |
220 | * - The inner Ethernet header is in the linear data area. | |
221 | * - skb->csum does not include the inner Ethernet header. | |
222 | * - The layer pointers are undefined. | |
223 | */ | |
224 | static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, | |
225 | struct ovs_key_ipv4_tunnel *tun_key) | |
226 | { | |
227 | struct ethhdr *eh; | |
228 | ||
229 | skb_reset_mac_header(skb); | |
230 | eh = eth_hdr(skb); | |
231 | ||
232 | if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) | |
233 | skb->protocol = eh->h_proto; | |
234 | else | |
235 | skb->protocol = htons(ETH_P_802_2); | |
236 | ||
237 | skb_dst_drop(skb); | |
238 | nf_reset(skb); | |
239 | skb_clear_rxhash(skb); | |
240 | secpath_reset(skb); | |
241 | vlan_set_tci(skb, 0); | |
242 | ||
243 | if (unlikely(compute_ip_summed(skb, false))) { | |
244 | kfree_skb(skb); | |
245 | return; | |
246 | } | |
247 | ||
248 | ovs_vport_receive(vport, skb, tun_key); | |
249 | } | |
250 | ||
a6ae068b LJ |
251 | /* Called with rcu_read_lock and BH disabled. */ |
252 | static int lisp_rcv(struct sock *sk, struct sk_buff *skb) | |
253 | { | |
85c9de19 | 254 | struct lisp_port *lisp_port; |
a6ae068b | 255 | struct lisphdr *lisph; |
a6ae068b LJ |
256 | struct iphdr *iph, *inner_iph; |
257 | struct ovs_key_ipv4_tunnel tun_key; | |
258 | __be64 key; | |
a6ae068b LJ |
259 | struct ethhdr *ethh; |
260 | __be16 protocol; | |
261 | ||
85c9de19 PS |
262 | lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); |
263 | if (unlikely(!lisp_port)) | |
264 | goto error; | |
265 | ||
a6ae068b LJ |
266 | if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) |
267 | goto error; | |
268 | ||
269 | lisph = lisp_hdr(skb); | |
270 | ||
271 | skb_pull_rcsum(skb, LISP_HLEN); | |
272 | ||
273 | if (lisph->instance_id_present != 1) | |
274 | key = 0; | |
275 | else | |
276 | key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); | |
277 | ||
a6ae068b | 278 | /* Save outer tunnel values */ |
85c9de19 | 279 | iph = ip_hdr(skb); |
adda018c | 280 | ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); |
a6ae068b LJ |
281 | |
282 | /* Drop non-IP inner packets */ | |
283 | inner_iph = (struct iphdr *)(lisph + 1); | |
284 | switch (inner_iph->version) { | |
285 | case 4: | |
286 | protocol = htons(ETH_P_IP); | |
287 | break; | |
288 | case 6: | |
289 | protocol = htons(ETH_P_IPV6); | |
290 | break; | |
291 | default: | |
292 | goto error; | |
293 | } | |
294 | ||
295 | /* Add Ethernet header */ | |
296 | ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); | |
297 | memset(ethh, 0, ETH_HLEN); | |
298 | ethh->h_dest[0] = 0x02; | |
299 | ethh->h_source[0] = 0x02; | |
300 | ethh->h_proto = protocol; | |
301 | ||
3cfede14 PS |
302 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
303 | ||
db0c3134 | 304 | ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); |
a6ae068b LJ |
305 | goto out; |
306 | ||
307 | error: | |
308 | kfree_skb(skb); | |
309 | out: | |
310 | return 0; | |
311 | } | |
312 | ||
313 | /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ | |
314 | #define UDP_ENCAP_LISP 1 | |
315 | static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) | |
316 | { | |
a6ae068b | 317 | struct sockaddr_in sin; |
c405d282 | 318 | int err; |
a6ae068b LJ |
319 | |
320 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
321 | &lisp_port->lisp_rcv_socket); | |
322 | if (err) | |
323 | goto error; | |
324 | ||
325 | /* release net ref. */ | |
326 | sk_change_net(lisp_port->lisp_rcv_socket->sk, net); | |
327 | ||
328 | sin.sin_family = AF_INET; | |
329 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | |
c405d282 | 330 | sin.sin_port = lisp_port->dst_port; |
a6ae068b LJ |
331 | |
332 | err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, | |
333 | sizeof(struct sockaddr_in)); | |
334 | if (err) | |
335 | goto error_sock; | |
336 | ||
337 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_type = UDP_ENCAP_LISP; | |
338 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_rcv = lisp_rcv; | |
339 | ||
340 | udp_encap_enable(); | |
341 | ||
342 | return 0; | |
343 | ||
344 | error_sock: | |
345 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
346 | error: | |
347 | pr_warn("cannot register lisp protocol handler: %d\n", err); | |
348 | return err; | |
349 | } | |
350 | ||
c405d282 | 351 | static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) |
a6ae068b | 352 | { |
c405d282 | 353 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b | 354 | |
c405d282 PS |
355 | if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) |
356 | return -EMSGSIZE; | |
357 | return 0; | |
a6ae068b LJ |
358 | } |
359 | ||
c405d282 | 360 | static void lisp_tnl_destroy(struct vport *vport) |
a6ae068b | 361 | { |
c405d282 PS |
362 | struct lisp_port *lisp_port = lisp_vport(vport); |
363 | ||
85c9de19 PS |
364 | list_del_rcu(&lisp_port->list); |
365 | /* Release socket */ | |
366 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
c405d282 PS |
367 | |
368 | ovs_vport_deferred_free(vport); | |
85c9de19 PS |
369 | } |
370 | ||
c405d282 | 371 | static struct vport *lisp_tnl_create(const struct vport_parms *parms) |
85c9de19 | 372 | { |
c405d282 PS |
373 | struct net *net = ovs_dp_get_net(parms->dp); |
374 | struct nlattr *options = parms->options; | |
85c9de19 | 375 | struct lisp_port *lisp_port; |
c405d282 | 376 | struct vport *vport; |
a6ae068b LJ |
377 | struct nlattr *a; |
378 | int err; | |
379 | u16 dst_port; | |
a6ae068b LJ |
380 | |
381 | if (!options) { | |
382 | err = -EINVAL; | |
c405d282 | 383 | goto error; |
a6ae068b LJ |
384 | } |
385 | ||
386 | a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); | |
387 | if (a && nla_len(a) == sizeof(u16)) { | |
388 | dst_port = nla_get_u16(a); | |
389 | } else { | |
390 | /* Require destination port from userspace. */ | |
391 | err = -EINVAL; | |
c405d282 | 392 | goto error; |
a6ae068b LJ |
393 | } |
394 | ||
395 | /* Verify if we already have a socket created for this port */ | |
c405d282 | 396 | if (lisp_find_port(net, htons(dst_port))) { |
85c9de19 | 397 | err = -EEXIST; |
c405d282 | 398 | goto error; |
a6ae068b LJ |
399 | } |
400 | ||
c405d282 PS |
401 | vport = ovs_vport_alloc(sizeof(struct lisp_port), |
402 | &ovs_lisp_vport_ops, parms); | |
403 | if (IS_ERR(vport)) | |
404 | return vport; | |
a6ae068b | 405 | |
c405d282 PS |
406 | lisp_port = lisp_vport(vport); |
407 | lisp_port->dst_port = htons(dst_port); | |
408 | strncpy(lisp_port->name, parms->name, IFNAMSIZ); | |
a6ae068b LJ |
409 | |
410 | err = lisp_socket_init(lisp_port, net); | |
411 | if (err) | |
c405d282 | 412 | goto error_free; |
a6ae068b | 413 | |
c405d282 PS |
414 | list_add_tail_rcu(&lisp_port->list, &lisp_ports); |
415 | return vport; | |
a6ae068b | 416 | |
c405d282 PS |
417 | error_free: |
418 | ovs_vport_free(vport); | |
a6ae068b | 419 | error: |
c405d282 | 420 | return ERR_PTR(err); |
a6ae068b LJ |
421 | } |
422 | ||
11aa8dff PS |
423 | static bool need_linearize(const struct sk_buff *skb) |
424 | { | |
425 | int i; | |
426 | ||
427 | if (unlikely(skb_shinfo(skb)->frag_list)) | |
428 | return true; | |
429 | ||
430 | /* | |
431 | * Generally speaking we should linearize if there are paged frags. | |
432 | * However, if all of the refcounts are 1 we know nobody else can | |
433 | * change them from underneath us and we can skip the linearization. | |
434 | */ | |
435 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
436 | if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) | |
437 | return true; | |
438 | ||
439 | return false; | |
440 | } | |
441 | ||
442 | static struct sk_buff *handle_offloads(struct sk_buff *skb) | |
443 | { | |
444 | int err; | |
445 | ||
446 | forward_ip_summed(skb, true); | |
447 | ||
448 | ||
449 | if (skb_is_gso(skb)) { | |
450 | struct sk_buff *nskb; | |
451 | char cb[sizeof(skb->cb)]; | |
452 | ||
453 | memcpy(cb, skb->cb, sizeof(cb)); | |
454 | ||
455 | nskb = __skb_gso_segment(skb, 0, false); | |
456 | if (IS_ERR(nskb)) { | |
457 | err = PTR_ERR(nskb); | |
458 | goto error; | |
459 | } | |
460 | ||
461 | consume_skb(skb); | |
462 | skb = nskb; | |
463 | while (nskb) { | |
464 | memcpy(nskb->cb, cb, sizeof(cb)); | |
465 | nskb = nskb->next; | |
466 | } | |
467 | } else if (get_ip_summed(skb) == OVS_CSUM_PARTIAL) { | |
468 | /* Pages aren't locked and could change at any time. | |
469 | * If this happens after we compute the checksum, the | |
470 | * checksum will be wrong. We linearize now to avoid | |
471 | * this problem. | |
472 | */ | |
473 | if (unlikely(need_linearize(skb))) { | |
474 | err = __skb_linearize(skb); | |
475 | if (unlikely(err)) | |
476 | goto error; | |
477 | } | |
478 | ||
479 | err = skb_checksum_help(skb); | |
480 | if (unlikely(err)) | |
481 | goto error; | |
482 | } | |
483 | ||
484 | set_ip_summed(skb, OVS_CSUM_NONE); | |
485 | ||
486 | return skb; | |
487 | ||
488 | error: | |
489 | return ERR_PTR(err); | |
490 | } | |
491 | ||
492 | static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, | |
493 | u8 ipproto, int tunnel_hlen, | |
494 | void (*build_header)(const struct vport *, | |
495 | struct sk_buff *, | |
496 | int tunnel_hlen)) | |
497 | { | |
498 | int min_headroom; | |
499 | struct rtable *rt; | |
500 | __be32 saddr; | |
501 | int sent_len = 0; | |
502 | int err; | |
503 | struct sk_buff *nskb; | |
504 | ||
505 | /* Route lookup */ | |
506 | saddr = OVS_CB(skb)->tun_key->ipv4_src; | |
507 | rt = find_route(ovs_dp_get_net(vport->dp), | |
508 | &saddr, | |
509 | OVS_CB(skb)->tun_key->ipv4_dst, | |
510 | ipproto, | |
511 | OVS_CB(skb)->tun_key->ipv4_tos, | |
512 | skb_get_mark(skb)); | |
513 | if (IS_ERR(rt)) { | |
514 | err = PTR_ERR(rt); | |
515 | goto error; | |
516 | } | |
517 | ||
518 | tunnel_hlen += sizeof(struct iphdr); | |
519 | ||
520 | min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len | |
521 | + tunnel_hlen | |
522 | + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); | |
523 | ||
524 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | |
525 | int head_delta = SKB_DATA_ALIGN(min_headroom - | |
526 | skb_headroom(skb) + | |
527 | 16); | |
528 | ||
529 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | |
530 | 0, GFP_ATOMIC); | |
531 | if (unlikely(err)) | |
532 | goto err_free_rt; | |
533 | } | |
534 | ||
535 | /* Offloading */ | |
536 | nskb = handle_offloads(skb); | |
537 | if (IS_ERR(nskb)) { | |
538 | err = PTR_ERR(nskb); | |
539 | goto err_free_rt; | |
540 | } | |
541 | skb = nskb; | |
542 | ||
543 | /* Reset SKB */ | |
544 | nf_reset(skb); | |
545 | secpath_reset(skb); | |
546 | skb_dst_drop(skb); | |
547 | skb_clear_rxhash(skb); | |
548 | ||
549 | while (skb) { | |
550 | struct sk_buff *next_skb = skb->next; | |
551 | struct iphdr *iph; | |
552 | int frag_len; | |
553 | ||
554 | skb->next = NULL; | |
555 | ||
556 | if (unlikely(vlan_deaccel_tag(skb))) | |
557 | goto next; | |
558 | ||
559 | frag_len = skb->len; | |
560 | skb_push(skb, tunnel_hlen); | |
561 | skb_reset_network_header(skb); | |
562 | skb_set_transport_header(skb, sizeof(struct iphdr)); | |
563 | ||
564 | if (next_skb) | |
565 | skb_dst_set(skb, dst_clone(&rt_dst(rt))); | |
566 | else | |
567 | skb_dst_set(skb, &rt_dst(rt)); | |
568 | ||
569 | /* Push Tunnel header. */ | |
570 | build_header(vport, skb, tunnel_hlen); | |
571 | ||
572 | /* Push IP header. */ | |
573 | iph = ip_hdr(skb); | |
574 | iph->version = 4; | |
575 | iph->ihl = sizeof(struct iphdr) >> 2; | |
576 | iph->protocol = ipproto; | |
577 | iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; | |
578 | iph->saddr = saddr; | |
579 | iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; | |
580 | iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; | |
581 | iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & | |
582 | TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; | |
583 | /* | |
584 | * Allow our local IP stack to fragment the outer packet even | |
585 | * if the DF bit is set as a last resort. We also need to | |
586 | * force selection of an IP ID here with __ip_select_ident(), | |
587 | * as ip_select_ident() assumes a proper ID is not needed when | |
588 | * when the DF bit is set. | |
589 | */ | |
590 | skb->local_df = 1; | |
591 | __ip_select_ident(iph, skb_dst(skb), 0); | |
592 | ||
593 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | |
594 | ||
595 | err = ip_local_out(skb); | |
596 | if (unlikely(net_xmit_eval(err))) | |
597 | goto next; | |
598 | ||
599 | sent_len += frag_len; | |
600 | ||
601 | next: | |
602 | skb = next_skb; | |
603 | } | |
604 | ||
605 | return sent_len; | |
606 | ||
607 | err_free_rt: | |
608 | ip_rt_put(rt); | |
609 | error: | |
610 | return err; | |
611 | } | |
612 | ||
c405d282 | 613 | static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) |
a6ae068b | 614 | { |
c405d282 PS |
615 | int tnl_len; |
616 | int network_offset = skb_network_offset(skb); | |
a6ae068b | 617 | |
be7cd27e PS |
618 | if (unlikely(!OVS_CB(skb)->tun_key)) |
619 | return -EINVAL; | |
a6ae068b | 620 | |
c405d282 PS |
621 | /* We only encapsulate IPv4 and IPv6 packets */ |
622 | switch (skb->protocol) { | |
623 | case htons(ETH_P_IP): | |
624 | case htons(ETH_P_IPV6): | |
625 | /* Pop off "inner" Ethernet header */ | |
626 | skb_pull(skb, network_offset); | |
627 | tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, | |
628 | LISP_HLEN, lisp_build_header); | |
629 | return tnl_len > 0 ? tnl_len + network_offset : tnl_len; | |
630 | default: | |
fe7744e8 | 631 | kfree_skb(skb); |
be7cd27e | 632 | return 0; |
c405d282 | 633 | } |
a6ae068b LJ |
634 | } |
635 | ||
c405d282 | 636 | static const char *lisp_get_name(const struct vport *vport) |
a6ae068b | 637 | { |
c405d282 PS |
638 | struct lisp_port *lisp_port = lisp_vport(vport); |
639 | return lisp_port->name; | |
a6ae068b LJ |
640 | } |
641 | ||
642 | const struct vport_ops ovs_lisp_vport_ops = { | |
643 | .type = OVS_VPORT_TYPE_LISP, | |
a6ae068b LJ |
644 | .create = lisp_tnl_create, |
645 | .destroy = lisp_tnl_destroy, | |
c405d282 | 646 | .get_name = lisp_get_name, |
85c9de19 | 647 | .get_options = lisp_get_options, |
a6ae068b LJ |
648 | .send = lisp_tnl_send, |
649 | }; | |
650 | #else | |
651 | #warning LISP tunneling will not be available on kernels before 2.6.26 | |
652 | #endif /* Linux kernel < 2.6.26 */ |