]>
Commit | Line | Data |
---|---|---|
0b5e8b8e AZ |
1 | /* |
2 | * Geneve: Generic Network Virtualization Encapsulation | |
3 | * | |
4 | * Copyright (c) 2014 Nicira, Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/types.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/errno.h> | |
18 | #include <linux/slab.h> | |
19 | #include <linux/skbuff.h> | |
829a3ada | 20 | #include <linux/list.h> |
0b5e8b8e AZ |
21 | #include <linux/netdevice.h> |
22 | #include <linux/in.h> | |
23 | #include <linux/ip.h> | |
24 | #include <linux/udp.h> | |
25 | #include <linux/igmp.h> | |
26 | #include <linux/etherdevice.h> | |
27 | #include <linux/if_ether.h> | |
28 | #include <linux/if_vlan.h> | |
0b5e8b8e | 29 | #include <linux/ethtool.h> |
829a3ada | 30 | #include <linux/mutex.h> |
0b5e8b8e AZ |
31 | #include <net/arp.h> |
32 | #include <net/ndisc.h> | |
33 | #include <net/ip.h> | |
34 | #include <net/ip_tunnels.h> | |
35 | #include <net/icmp.h> | |
36 | #include <net/udp.h> | |
37 | #include <net/rtnetlink.h> | |
38 | #include <net/route.h> | |
39 | #include <net/dsfield.h> | |
40 | #include <net/inet_ecn.h> | |
41 | #include <net/net_namespace.h> | |
42 | #include <net/netns/generic.h> | |
43 | #include <net/geneve.h> | |
44 | #include <net/protocol.h> | |
45 | #include <net/udp_tunnel.h> | |
46 | #if IS_ENABLED(CONFIG_IPV6) | |
47 | #include <net/ipv6.h> | |
48 | #include <net/addrconf.h> | |
49 | #include <net/ip6_tunnel.h> | |
50 | #include <net/ip6_checksum.h> | |
51 | #endif | |
52 | ||
829a3ada JG |
53 | /* Protects sock_list and refcounts. */ |
54 | static DEFINE_MUTEX(geneve_mutex); | |
55 | ||
0b5e8b8e AZ |
56 | /* per-network namespace private data for this module */ |
57 | struct geneve_net { | |
df5dba8e | 58 | struct list_head sock_list; |
0b5e8b8e AZ |
59 | }; |
60 | ||
61 | static int geneve_net_id; | |
62 | ||
0b5e8b8e AZ |
63 | static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) |
64 | { | |
65 | return (struct genevehdr *)(udp_hdr(skb) + 1); | |
66 | } | |
67 | ||
46b1e4f9 JG |
68 | static struct geneve_sock *geneve_find_sock(struct net *net, |
69 | sa_family_t family, __be16 port) | |
0b5e8b8e | 70 | { |
df5dba8e | 71 | struct geneve_net *gn = net_generic(net, geneve_net_id); |
0b5e8b8e AZ |
72 | struct geneve_sock *gs; |
73 | ||
df5dba8e | 74 | list_for_each_entry(gs, &gn->sock_list, list) { |
46b1e4f9 JG |
75 | if (inet_sk(gs->sock->sk)->inet_sport == port && |
76 | inet_sk(gs->sock->sk)->sk.sk_family == family) | |
0b5e8b8e AZ |
77 | return gs; |
78 | } | |
79 | ||
80 | return NULL; | |
81 | } | |
82 | ||
83 | static void geneve_build_header(struct genevehdr *geneveh, | |
84 | __be16 tun_flags, u8 vni[3], | |
85 | u8 options_len, u8 *options) | |
86 | { | |
87 | geneveh->ver = GENEVE_VER; | |
88 | geneveh->opt_len = options_len / 4; | |
89 | geneveh->oam = !!(tun_flags & TUNNEL_OAM); | |
90 | geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT); | |
91 | geneveh->rsvd1 = 0; | |
92 | memcpy(geneveh->vni, vni, 3); | |
93 | geneveh->proto_type = htons(ETH_P_TEB); | |
94 | geneveh->rsvd2 = 0; | |
95 | ||
96 | memcpy(geneveh->options, options, options_len); | |
97 | } | |
98 | ||
f4e715c3 | 99 | /* Transmit a fully formatted Geneve frame. |
0b5e8b8e AZ |
100 | * |
101 | * When calling this function. The skb->data should point | |
102 | * to the geneve header which is fully formed. | |
103 | * | |
104 | * This function will add other UDP tunnel headers. | |
105 | */ | |
106 | int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, | |
107 | struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, | |
108 | __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, | |
109 | __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, | |
b8693877 | 110 | bool csum, bool xnet) |
0b5e8b8e AZ |
111 | { |
112 | struct genevehdr *gnvh; | |
113 | int min_headroom; | |
114 | int err; | |
115 | ||
0b5e8b8e AZ |
116 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len |
117 | + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) | |
df8a39de | 118 | + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); |
0b5e8b8e AZ |
119 | |
120 | err = skb_cow_head(skb, min_headroom); | |
997e068e PS |
121 | if (unlikely(err)) { |
122 | kfree_skb(skb); | |
0b5e8b8e | 123 | return err; |
997e068e | 124 | } |
0b5e8b8e | 125 | |
5968250c JP |
126 | skb = vlan_hwaccel_push_inside(skb); |
127 | if (unlikely(!skb)) | |
128 | return -ENOMEM; | |
0b5e8b8e | 129 | |
b736a623 JG |
130 | skb = udp_tunnel_handle_offloads(skb, csum); |
131 | if (IS_ERR(skb)) | |
132 | return PTR_ERR(skb); | |
133 | ||
0b5e8b8e AZ |
134 | gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); |
135 | geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); | |
136 | ||
45cac46e JG |
137 | skb_set_inner_protocol(skb, htons(ETH_P_TEB)); |
138 | ||
79b16aad | 139 | return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, |
d998f8ef | 140 | tos, ttl, df, src_port, dst_port, xnet, |
b8693877 | 141 | !csum); |
0b5e8b8e AZ |
142 | } |
143 | EXPORT_SYMBOL_GPL(geneve_xmit_skb); | |
144 | ||
a4c9ea5e JS |
145 | static int geneve_hlen(struct genevehdr *gh) |
146 | { | |
147 | return sizeof(*gh) + gh->opt_len * 4; | |
148 | } | |
149 | ||
150 | static struct sk_buff **geneve_gro_receive(struct sk_buff **head, | |
a2b12f3c TH |
151 | struct sk_buff *skb, |
152 | struct udp_offload *uoff) | |
a4c9ea5e JS |
153 | { |
154 | struct sk_buff *p, **pp = NULL; | |
155 | struct genevehdr *gh, *gh2; | |
156 | unsigned int hlen, gh_len, off_gnv; | |
157 | const struct packet_offload *ptype; | |
158 | __be16 type; | |
159 | int flush = 1; | |
160 | ||
161 | off_gnv = skb_gro_offset(skb); | |
162 | hlen = off_gnv + sizeof(*gh); | |
163 | gh = skb_gro_header_fast(skb, off_gnv); | |
164 | if (skb_gro_header_hard(skb, hlen)) { | |
165 | gh = skb_gro_header_slow(skb, hlen, off_gnv); | |
166 | if (unlikely(!gh)) | |
167 | goto out; | |
168 | } | |
169 | ||
170 | if (gh->ver != GENEVE_VER || gh->oam) | |
171 | goto out; | |
172 | gh_len = geneve_hlen(gh); | |
173 | ||
174 | hlen = off_gnv + gh_len; | |
175 | if (skb_gro_header_hard(skb, hlen)) { | |
176 | gh = skb_gro_header_slow(skb, hlen, off_gnv); | |
177 | if (unlikely(!gh)) | |
178 | goto out; | |
179 | } | |
180 | ||
181 | flush = 0; | |
182 | ||
183 | for (p = *head; p; p = p->next) { | |
184 | if (!NAPI_GRO_CB(p)->same_flow) | |
185 | continue; | |
186 | ||
187 | gh2 = (struct genevehdr *)(p->data + off_gnv); | |
188 | if (gh->opt_len != gh2->opt_len || | |
189 | memcmp(gh, gh2, gh_len)) { | |
190 | NAPI_GRO_CB(p)->same_flow = 0; | |
191 | continue; | |
192 | } | |
193 | } | |
194 | ||
195 | type = gh->proto_type; | |
196 | ||
197 | rcu_read_lock(); | |
198 | ptype = gro_find_receive_by_type(type); | |
51456b29 | 199 | if (!ptype) { |
a4c9ea5e JS |
200 | flush = 1; |
201 | goto out_unlock; | |
202 | } | |
203 | ||
204 | skb_gro_pull(skb, gh_len); | |
205 | skb_gro_postpull_rcsum(skb, gh, gh_len); | |
206 | pp = ptype->callbacks.gro_receive(head, skb); | |
207 | ||
208 | out_unlock: | |
209 | rcu_read_unlock(); | |
210 | out: | |
211 | NAPI_GRO_CB(skb)->flush |= flush; | |
212 | ||
213 | return pp; | |
214 | } | |
215 | ||
a2b12f3c TH |
216 | static int geneve_gro_complete(struct sk_buff *skb, int nhoff, |
217 | struct udp_offload *uoff) | |
a4c9ea5e JS |
218 | { |
219 | struct genevehdr *gh; | |
220 | struct packet_offload *ptype; | |
221 | __be16 type; | |
222 | int gh_len; | |
223 | int err = -ENOSYS; | |
224 | ||
225 | udp_tunnel_gro_complete(skb, nhoff); | |
226 | ||
227 | gh = (struct genevehdr *)(skb->data + nhoff); | |
228 | gh_len = geneve_hlen(gh); | |
229 | type = gh->proto_type; | |
230 | ||
231 | rcu_read_lock(); | |
232 | ptype = gro_find_complete_by_type(type); | |
00db4124 | 233 | if (ptype) |
a4c9ea5e JS |
234 | err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); |
235 | ||
236 | rcu_read_unlock(); | |
237 | return err; | |
238 | } | |
239 | ||
0b5e8b8e AZ |
240 | static void geneve_notify_add_rx_port(struct geneve_sock *gs) |
241 | { | |
242 | struct sock *sk = gs->sock->sk; | |
243 | sa_family_t sa_family = sk->sk_family; | |
244 | int err; | |
245 | ||
246 | if (sa_family == AF_INET) { | |
247 | err = udp_add_offload(&gs->udp_offloads); | |
248 | if (err) | |
249 | pr_warn("geneve: udp_add_offload failed with status %d\n", | |
250 | err); | |
251 | } | |
252 | } | |
253 | ||
7ed767f7 JG |
254 | static void geneve_notify_del_rx_port(struct geneve_sock *gs) |
255 | { | |
256 | struct sock *sk = gs->sock->sk; | |
257 | sa_family_t sa_family = sk->sk_family; | |
258 | ||
259 | if (sa_family == AF_INET) | |
260 | udp_del_offload(&gs->udp_offloads); | |
261 | } | |
262 | ||
0b5e8b8e AZ |
263 | /* Callback from net/ipv4/udp.c to receive packets */ |
264 | static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) | |
265 | { | |
266 | struct genevehdr *geneveh; | |
267 | struct geneve_sock *gs; | |
268 | int opts_len; | |
269 | ||
270 | /* Need Geneve and inner Ethernet header to be present */ | |
271 | if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) | |
272 | goto error; | |
273 | ||
274 | /* Return packets with reserved bits set */ | |
275 | geneveh = geneve_hdr(skb); | |
276 | ||
277 | if (unlikely(geneveh->ver != GENEVE_VER)) | |
278 | goto error; | |
279 | ||
280 | if (unlikely(geneveh->proto_type != htons(ETH_P_TEB))) | |
281 | goto error; | |
282 | ||
283 | opts_len = geneveh->opt_len * 4; | |
284 | if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, | |
285 | htons(ETH_P_TEB))) | |
286 | goto drop; | |
287 | ||
288 | gs = rcu_dereference_sk_user_data(sk); | |
289 | if (!gs) | |
290 | goto drop; | |
291 | ||
292 | gs->rcv(gs, skb); | |
293 | return 0; | |
294 | ||
295 | drop: | |
296 | /* Consume bad packet */ | |
297 | kfree_skb(skb); | |
298 | return 0; | |
299 | ||
300 | error: | |
301 | /* Let the UDP layer deal with the skb */ | |
302 | return 1; | |
303 | } | |
304 | ||
0b5e8b8e AZ |
305 | static struct socket *geneve_create_sock(struct net *net, bool ipv6, |
306 | __be16 port) | |
307 | { | |
308 | struct socket *sock; | |
309 | struct udp_port_cfg udp_conf; | |
310 | int err; | |
311 | ||
312 | memset(&udp_conf, 0, sizeof(udp_conf)); | |
313 | ||
314 | if (ipv6) { | |
315 | udp_conf.family = AF_INET6; | |
316 | } else { | |
317 | udp_conf.family = AF_INET; | |
42350dca | 318 | udp_conf.local_ip.s_addr = htonl(INADDR_ANY); |
0b5e8b8e AZ |
319 | } |
320 | ||
321 | udp_conf.local_udp_port = port; | |
322 | ||
323 | /* Open UDP socket */ | |
324 | err = udp_sock_create(net, &udp_conf, &sock); | |
325 | if (err < 0) | |
326 | return ERR_PTR(err); | |
327 | ||
328 | return sock; | |
329 | } | |
330 | ||
331 | /* Create new listen socket if needed */ | |
332 | static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, | |
333 | geneve_rcv_t *rcv, void *data, | |
334 | bool ipv6) | |
335 | { | |
df5dba8e | 336 | struct geneve_net *gn = net_generic(net, geneve_net_id); |
0b5e8b8e AZ |
337 | struct geneve_sock *gs; |
338 | struct socket *sock; | |
339 | struct udp_tunnel_sock_cfg tunnel_cfg; | |
340 | ||
341 | gs = kzalloc(sizeof(*gs), GFP_KERNEL); | |
342 | if (!gs) | |
343 | return ERR_PTR(-ENOMEM); | |
344 | ||
0b5e8b8e AZ |
345 | sock = geneve_create_sock(net, ipv6, port); |
346 | if (IS_ERR(sock)) { | |
347 | kfree(gs); | |
348 | return ERR_CAST(sock); | |
349 | } | |
350 | ||
351 | gs->sock = sock; | |
829a3ada | 352 | gs->refcnt = 1; |
0b5e8b8e AZ |
353 | gs->rcv = rcv; |
354 | gs->rcv_data = data; | |
355 | ||
356 | /* Initialize the geneve udp offloads structure */ | |
357 | gs->udp_offloads.port = port; | |
a4c9ea5e JS |
358 | gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; |
359 | gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; | |
0b5e8b8e | 360 | geneve_notify_add_rx_port(gs); |
0b5e8b8e AZ |
361 | |
362 | /* Mark socket as an encapsulation socket */ | |
363 | tunnel_cfg.sk_user_data = gs; | |
364 | tunnel_cfg.encap_type = 1; | |
365 | tunnel_cfg.encap_rcv = geneve_udp_encap_recv; | |
366 | tunnel_cfg.encap_destroy = NULL; | |
367 | setup_udp_tunnel_sock(net, sock, &tunnel_cfg); | |
368 | ||
df5dba8e | 369 | list_add(&gs->list, &gn->sock_list); |
829a3ada | 370 | |
0b5e8b8e AZ |
371 | return gs; |
372 | } | |
373 | ||
374 | struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, | |
375 | geneve_rcv_t *rcv, void *data, | |
376 | bool no_share, bool ipv6) | |
377 | { | |
378 | struct geneve_sock *gs; | |
379 | ||
829a3ada | 380 | mutex_lock(&geneve_mutex); |
0b5e8b8e | 381 | |
46b1e4f9 | 382 | gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); |
829a3ada JG |
383 | if (gs) { |
384 | if (!no_share && gs->rcv == rcv) | |
385 | gs->refcnt++; | |
386 | else | |
0b5e8b8e | 387 | gs = ERR_PTR(-EBUSY); |
829a3ada JG |
388 | } else { |
389 | gs = geneve_socket_create(net, port, rcv, data, ipv6); | |
390 | } | |
12069401 | 391 | |
829a3ada | 392 | mutex_unlock(&geneve_mutex); |
0b5e8b8e AZ |
393 | |
394 | return gs; | |
395 | } | |
396 | EXPORT_SYMBOL_GPL(geneve_sock_add); | |
397 | ||
398 | void geneve_sock_release(struct geneve_sock *gs) | |
399 | { | |
829a3ada | 400 | mutex_lock(&geneve_mutex); |
7ed767f7 | 401 | |
829a3ada JG |
402 | if (--gs->refcnt) |
403 | goto unlock; | |
0b5e8b8e | 404 | |
df5dba8e | 405 | list_del(&gs->list); |
7ed767f7 | 406 | geneve_notify_del_rx_port(gs); |
61f3cade JG |
407 | udp_tunnel_sock_release(gs->sock); |
408 | kfree_rcu(gs, rcu); | |
829a3ada JG |
409 | |
410 | unlock: | |
411 | mutex_unlock(&geneve_mutex); | |
0b5e8b8e AZ |
412 | } |
413 | EXPORT_SYMBOL_GPL(geneve_sock_release); | |
414 | ||
415 | static __net_init int geneve_init_net(struct net *net) | |
416 | { | |
417 | struct geneve_net *gn = net_generic(net, geneve_net_id); | |
0b5e8b8e | 418 | |
df5dba8e | 419 | INIT_LIST_HEAD(&gn->sock_list); |
0b5e8b8e AZ |
420 | |
421 | return 0; | |
422 | } | |
423 | ||
424 | static struct pernet_operations geneve_net_ops = { | |
425 | .init = geneve_init_net, | |
0b5e8b8e AZ |
426 | .id = &geneve_net_id, |
427 | .size = sizeof(struct geneve_net), | |
428 | }; | |
429 | ||
430 | static int __init geneve_init_module(void) | |
431 | { | |
432 | int rc; | |
433 | ||
0b5e8b8e AZ |
434 | rc = register_pernet_subsys(&geneve_net_ops); |
435 | if (rc) | |
436 | return rc; | |
437 | ||
438 | pr_info("Geneve driver\n"); | |
439 | ||
440 | return 0; | |
441 | } | |
829a3ada | 442 | module_init(geneve_init_module); |
0b5e8b8e AZ |
443 | |
444 | static void __exit geneve_cleanup_module(void) | |
445 | { | |
d3ca9eaf | 446 | unregister_pernet_subsys(&geneve_net_ops); |
0b5e8b8e AZ |
447 | } |
448 | module_exit(geneve_cleanup_module); | |
449 | ||
450 | MODULE_LICENSE("GPL"); | |
451 | MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>"); | |
452 | MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic"); | |
453 | MODULE_ALIAS_RTNL_LINK("geneve"); |