]>
Commit | Line | Data |
---|---|---|
1b7ee51f PS |
1 | /* |
2 | * Copyright (c) 2007-2013 Nicira, Inc. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this program; if not, write to the Free Software | |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
16 | * 02110-1301, USA | |
17 | * | |
18 | * This code is derived from kernel vxlan module. | |
19 | */ | |
20 | ||
29c71cfa | 21 | #include <linux/version.h> |
29c71cfa | 22 | |
1b7ee51f PS |
23 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
24 | ||
25 | #include <linux/kernel.h> | |
26 | #include <linux/types.h> | |
27 | #include <linux/module.h> | |
28 | #include <linux/errno.h> | |
29 | #include <linux/slab.h> | |
30 | #include <linux/skbuff.h> | |
31 | #include <linux/rculist.h> | |
32 | #include <linux/netdevice.h> | |
33 | #include <linux/in.h> | |
34 | #include <linux/ip.h> | |
35 | #include <linux/udp.h> | |
36 | #include <linux/igmp.h> | |
37 | #include <linux/etherdevice.h> | |
38 | #include <linux/if_ether.h> | |
39 | #include <linux/if_vlan.h> | |
40 | #include <linux/hash.h> | |
41 | #include <linux/ethtool.h> | |
42 | #include <net/arp.h> | |
43 | #include <net/ndisc.h> | |
44 | #include <net/ip.h> | |
705e9260 | 45 | #include <net/gre.h> |
1b7ee51f PS |
46 | #include <net/ip_tunnels.h> |
47 | #include <net/icmp.h> | |
48 | #include <net/udp.h> | |
131971ce | 49 | #include <net/udp_tunnel.h> |
1b7ee51f PS |
50 | #include <net/rtnetlink.h> |
51 | #include <net/route.h> | |
52 | #include <net/dsfield.h> | |
53 | #include <net/inet_ecn.h> | |
54 | #include <net/net_namespace.h> | |
55 | #include <net/netns/generic.h> | |
56 | #include <net/vxlan.h> | |
57 | ||
1b7ee51f | 58 | #include "compat.h" |
b2f02418 | 59 | #include "datapath.h" |
1b7ee51f PS |
60 | #include "gso.h" |
61 | #include "vlan.h" | |
62 | ||
b22a8d87 JG |
63 | #ifndef USE_UPSTREAM_VXLAN |
64 | ||
1b7ee51f PS |
65 | /* VXLAN protocol header */ |
66 | struct vxlanhdr { | |
67 | __be32 vx_flags; | |
68 | __be32 vx_vni; | |
69 | }; | |
70 | ||
1b7ee51f PS |
71 | /* Callback from net/ipv4/udp.c to receive packets */ |
72 | static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) | |
73 | { | |
1b7ee51f PS |
74 | struct vxlan_sock *vs; |
75 | struct vxlanhdr *vxh; | |
ababf424 | 76 | u32 flags, vni; |
3174a818 | 77 | struct vxlan_metadata md = {0}; |
1b7ee51f PS |
78 | |
79 | /* Need Vxlan and inner Ethernet header to be present */ | |
80 | if (!pskb_may_pull(skb, VXLAN_HLEN)) | |
81 | goto error; | |
82 | ||
1b7ee51f | 83 | vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); |
ababf424 TG |
84 | flags = ntohl(vxh->vx_flags); |
85 | vni = ntohl(vxh->vx_vni); | |
86 | ||
87 | if (flags & VXLAN_HF_VNI) { | |
88 | flags &= ~VXLAN_HF_VNI; | |
89 | } else { | |
90 | /* VNI flag always required to be set */ | |
91 | goto bad_flags; | |
1b7ee51f PS |
92 | } |
93 | ||
94 | if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) | |
95 | goto drop; | |
96 | ||
b2f02418 | 97 | vs = rcu_dereference_sk_user_data(sk); |
1b7ee51f PS |
98 | if (!vs) |
99 | goto drop; | |
100 | ||
3174a818 TG |
101 | /* For backwards compatibility, only allow reserved fields to be |
102 | * used by VXLAN extensions if explicitly requested. | |
103 | */ | |
104 | if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { | |
105 | struct vxlanhdr_gbp *gbp; | |
106 | ||
107 | gbp = (struct vxlanhdr_gbp *)vxh; | |
108 | md.gbp = ntohs(gbp->policy_id); | |
109 | ||
110 | if (gbp->dont_learn) | |
111 | md.gbp |= VXLAN_GBP_DONT_LEARN; | |
112 | ||
113 | if (gbp->policy_applied) | |
114 | md.gbp |= VXLAN_GBP_POLICY_APPLIED; | |
115 | ||
116 | flags &= ~VXLAN_GBP_USED_BITS; | |
117 | } | |
118 | ||
ababf424 TG |
119 | if (flags || (vni & 0xff)) { |
120 | /* If there are any unprocessed flags remaining treat | |
121 | * this as a malformed packet. This behavior diverges from | |
122 | * VXLAN RFC (RFC7348) which stipulates that bits in reserved | |
123 | * in reserved fields are to be ignored. The approach here | |
124 | * maintains compatbility with previous stack code, and also | |
125 | * is more robust and provides a little more security in | |
126 | * adding extensions to VXLAN. | |
127 | */ | |
128 | ||
129 | goto bad_flags; | |
130 | } | |
131 | ||
3174a818 TG |
132 | md.vni = vxh->vx_vni; |
133 | vs->rcv(vs, skb, &md); | |
a109c9fb | 134 | return 0; |
1b7ee51f PS |
135 | |
136 | drop: | |
137 | /* Consume bad packet */ | |
138 | kfree_skb(skb); | |
139 | return 0; | |
ababf424 TG |
140 | bad_flags: |
141 | pr_debug("invalid vxlan flags=%#x vni=%#x\n", | |
142 | ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); | |
1b7ee51f PS |
143 | |
144 | error: | |
145 | /* Return non vxlan pkt */ | |
146 | return 1; | |
147 | } | |
148 | ||
149 | static void vxlan_sock_put(struct sk_buff *skb) | |
150 | { | |
151 | sock_put(skb->sk); | |
152 | } | |
153 | ||
154 | /* On transmit, associate with the tunnel socket */ | |
155 | static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb) | |
156 | { | |
157 | skb_orphan(skb); | |
158 | sock_hold(sk); | |
159 | skb->sk = sk; | |
160 | skb->destructor = vxlan_sock_put; | |
161 | } | |
162 | ||
44e1f2a8 | 163 | static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, |
3174a818 TG |
164 | struct vxlan_metadata *md) |
165 | { | |
166 | struct vxlanhdr_gbp *gbp; | |
167 | ||
f1063b75 TG |
168 | if (!md->gbp) |
169 | return; | |
170 | ||
3174a818 TG |
171 | gbp = (struct vxlanhdr_gbp *)vxh; |
172 | vxh->vx_flags |= htonl(VXLAN_HF_GBP); | |
173 | ||
174 | if (md->gbp & VXLAN_GBP_DONT_LEARN) | |
175 | gbp->dont_learn = 1; | |
176 | ||
177 | if (md->gbp & VXLAN_GBP_POLICY_APPLIED) | |
178 | gbp->policy_applied = 1; | |
179 | ||
180 | gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); | |
181 | } | |
182 | ||
c0cddcec | 183 | int rpl_vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, |
bedf02f4 AW |
184 | __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, |
185 | __be16 src_port, __be16 dst_port, | |
186 | struct vxlan_metadata *md, bool xnet, u32 vxflags) | |
1b7ee51f PS |
187 | { |
188 | struct vxlanhdr *vxh; | |
1b7ee51f PS |
189 | int min_headroom; |
190 | int err; | |
2d79a600 | 191 | bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM); |
1b7ee51f | 192 | |
1b7ee51f PS |
193 | min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len |
194 | + VXLAN_HLEN + sizeof(struct iphdr) | |
efd8a18e | 195 | + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); |
1b7ee51f PS |
196 | |
197 | /* Need space for new headers (invalidates iph ptr) */ | |
198 | err = skb_cow_head(skb, min_headroom); | |
93258bd7 PS |
199 | if (unlikely(err)) { |
200 | kfree_skb(skb); | |
1b7ee51f | 201 | return err; |
93258bd7 | 202 | } |
1b7ee51f | 203 | |
90d8188c JG |
204 | skb = vlan_hwaccel_push_inside(skb); |
205 | if (WARN_ON(!skb)) | |
206 | return -ENOMEM; | |
1b7ee51f | 207 | |
b13c9440 JG |
208 | skb = udp_tunnel_handle_offloads(skb, udp_sum, true); |
209 | if (IS_ERR(skb)) | |
210 | return PTR_ERR(skb); | |
211 | ||
1b7ee51f | 212 | vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); |
ababf424 | 213 | vxh->vx_flags = htonl(VXLAN_HF_VNI); |
3174a818 TG |
214 | vxh->vx_vni = md->vni; |
215 | ||
44e1f2a8 TG |
216 | if (vxflags & VXLAN_F_GBP) |
217 | vxlan_build_gbp_hdr(vxh, vxflags, md); | |
1b7ee51f | 218 | |
c0cddcec | 219 | vxlan_set_owner(sk, skb); |
1b7ee51f | 220 | |
e4bafe59 JG |
221 | ovs_skb_set_inner_protocol(skb, htons(ETH_P_TEB)); |
222 | ||
c0cddcec | 223 | return udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, |
7bdcee3e | 224 | ttl, df, src_port, dst_port, xnet, |
2d79a600 | 225 | !udp_sum); |
1b7ee51f | 226 | } |
bedf02f4 | 227 | EXPORT_SYMBOL_GPL(rpl_vxlan_xmit_skb); |
1b7ee51f | 228 | |
a109c9fb PS |
229 | static void rcu_free_vs(struct rcu_head *rcu) |
230 | { | |
231 | struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu); | |
232 | ||
233 | kfree(vs); | |
234 | } | |
235 | ||
236 | static void vxlan_del_work(struct work_struct *work) | |
237 | { | |
238 | struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); | |
239 | ||
b22a8d87 | 240 | udp_tunnel_sock_release(vs->sock); |
a109c9fb | 241 | call_rcu(&vs->rcu, rcu_free_vs); |
a109c9fb PS |
242 | } |
243 | ||
131971ce JG |
244 | static struct socket *vxlan_create_sock(struct net *net, bool ipv6, |
245 | __be16 port, u32 flags) | |
246 | { | |
247 | struct socket *sock; | |
248 | struct udp_port_cfg udp_conf; | |
249 | int err; | |
250 | ||
251 | memset(&udp_conf, 0, sizeof(udp_conf)); | |
252 | ||
253 | if (ipv6) { | |
254 | udp_conf.family = AF_INET6; | |
255 | /* The checksum flag is silently ignored but it | |
256 | * doesn't make sense here anyways because OVS enables | |
257 | * checksums on a finer granularity than per-socket. | |
258 | */ | |
259 | } else { | |
260 | udp_conf.family = AF_INET; | |
261 | udp_conf.local_ip.s_addr = htonl(INADDR_ANY); | |
262 | } | |
263 | ||
264 | udp_conf.local_udp_port = port; | |
265 | ||
266 | /* Open UDP socket */ | |
267 | err = udp_sock_create(net, &udp_conf, &sock); | |
268 | if (err < 0) | |
269 | return ERR_PTR(err); | |
270 | ||
131971ce JG |
271 | return sock; |
272 | } | |
273 | ||
a109c9fb | 274 | static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, |
3174a818 | 275 | vxlan_rcv_t *rcv, void *data, u32 flags) |
1b7ee51f PS |
276 | { |
277 | struct vxlan_sock *vs; | |
131971ce | 278 | struct socket *sock; |
b22a8d87 | 279 | struct udp_tunnel_sock_cfg tunnel_cfg; |
1b7ee51f PS |
280 | |
281 | vs = kmalloc(sizeof(*vs), GFP_KERNEL); | |
a109c9fb PS |
282 | if (!vs) { |
283 | pr_debug("memory alocation failure\n"); | |
1b7ee51f | 284 | return ERR_PTR(-ENOMEM); |
a109c9fb PS |
285 | } |
286 | ||
287 | INIT_WORK(&vs->del_work, vxlan_del_work); | |
1b7ee51f | 288 | |
131971ce JG |
289 | sock = vxlan_create_sock(net, false, port, flags); |
290 | if (IS_ERR(sock)) { | |
1b7ee51f | 291 | kfree(vs); |
131971ce | 292 | return ERR_CAST(sock); |
1b7ee51f PS |
293 | } |
294 | ||
131971ce | 295 | vs->sock = sock; |
a109c9fb PS |
296 | vs->rcv = rcv; |
297 | vs->data = data; | |
44e1f2a8 | 298 | vs->flags = (flags & VXLAN_F_RCV_FLAGS); |
1b7ee51f | 299 | |
b22a8d87 JG |
300 | tunnel_cfg.sk_user_data = vs; |
301 | tunnel_cfg.encap_type = 1; | |
302 | tunnel_cfg.encap_rcv = vxlan_udp_encap_recv; | |
303 | tunnel_cfg.encap_destroy = NULL; | |
304 | ||
305 | setup_udp_tunnel_sock(net, sock, &tunnel_cfg); | |
1b7ee51f | 306 | |
1b7ee51f PS |
307 | return vs; |
308 | } | |
309 | ||
bedf02f4 AW |
310 | struct vxlan_sock *rpl_vxlan_sock_add(struct net *net, __be16 port, |
311 | vxlan_rcv_t *rcv, void *data, | |
312 | bool no_share, u32 flags) | |
1b7ee51f | 313 | { |
3174a818 | 314 | return vxlan_socket_create(net, port, rcv, data, flags); |
1b7ee51f | 315 | } |
bedf02f4 | 316 | EXPORT_SYMBOL_GPL(rpl_vxlan_sock_add); |
1b7ee51f | 317 | |
bedf02f4 | 318 | void rpl_vxlan_sock_release(struct vxlan_sock *vs) |
1b7ee51f | 319 | { |
b2f02418 | 320 | ASSERT_OVSL(); |
1b7ee51f | 321 | |
533e96e7 | 322 | queue_work(system_wq, &vs->del_work); |
1b7ee51f | 323 | } |
bedf02f4 | 324 | EXPORT_SYMBOL_GPL(rpl_vxlan_sock_release); |
29c71cfa | 325 | |
4ceb6c87 | 326 | #endif /* !USE_UPSTREAM_VXLAN */ |