]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
012a5729 PS |
2 | #ifndef __NET_VXLAN_H |
3 | #define __NET_VXLAN_H 1 | |
4 | ||
5f35227e | 5 | #include <linux/if_vlan.h> |
86a98057 | 6 | #include <net/udp_tunnel.h> |
ee122c79 | 7 | #include <net/dst_metadata.h> |
7c46a640 | 8 | #include <net/udp_tunnel.h> |
012a5729 | 9 | |
828788ac JB |
10 | /* VXLAN protocol (RFC 7348) header: |
11 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
12 | * |R|R|R|R|I|R|R|R| Reserved | | |
13 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
14 | * | VXLAN Network Identifier (VNI) | Reserved | | |
15 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
16 | * | |
17 | * I = VXLAN Network Identifier (VNI) present. | |
18 | */ | |
19 | struct vxlanhdr { | |
20 | __be32 vx_flags; | |
21 | __be32 vx_vni; | |
22 | }; | |
23 | ||
24 | /* VXLAN header flags. */ | |
54bfd872 | 25 | #define VXLAN_HF_VNI cpu_to_be32(BIT(27)) |
828788ac JB |
26 | |
27 | #define VXLAN_N_VID (1u << 24) | |
28 | #define VXLAN_VID_MASK (VXLAN_N_VID - 1) | |
54bfd872 | 29 | #define VXLAN_VNI_MASK cpu_to_be32(VXLAN_VID_MASK << 8) |
828788ac JB |
30 | #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) |
31 | ||
32 | #define VNI_HASH_BITS 10 | |
33 | #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) | |
34 | #define FDB_HASH_BITS 8 | |
35 | #define FDB_HASH_SIZE (1<<FDB_HASH_BITS) | |
36 | ||
37 | /* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X): | |
38 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
39 | * |R|R|R|R|I|R|R|R|R|R|C| Reserved | | |
40 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
41 | * | VXLAN Network Identifier (VNI) |O| Csum start | | |
42 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
43 | * | |
44 | * C = Remote checksum offload bit. When set indicates that the | |
45 | * remote checksum offload data is present. | |
46 | * | |
47 | * O = Offset bit. Indicates the checksum offset relative to | |
48 | * checksum start. | |
49 | * | |
50 | * Csum start = Checksum start divided by two. | |
51 | * | |
52 | * http://tools.ietf.org/html/draft-herbert-vxlan-rco | |
53 | */ | |
54 | ||
55 | /* VXLAN-RCO header flags. */ | |
54bfd872 | 56 | #define VXLAN_HF_RCO cpu_to_be32(BIT(21)) |
828788ac JB |
57 | |
58 | /* Remote checksum offload header option */ | |
54bfd872 JB |
59 | #define VXLAN_RCO_MASK cpu_to_be32(0x7f) /* Last byte of vni field */ |
60 | #define VXLAN_RCO_UDP cpu_to_be32(0x80) /* Indicate UDP RCO (TCP when not set *) */ | |
61 | #define VXLAN_RCO_SHIFT 1 /* Left shift of start */ | |
828788ac | 62 | #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) |
54bfd872 | 63 | #define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT) |
828788ac | 64 | |
3511494c | 65 | /* |
828788ac | 66 | * VXLAN Group Based Policy Extension (VXLAN_F_GBP): |
3511494c | 67 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
828788ac | 68 | * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | |
3511494c TG |
69 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
70 | * | VXLAN Network Identifier (VNI) | Reserved | | |
71 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
72 | * | |
828788ac JB |
73 | * G = Group Policy ID present. |
74 | * | |
3511494c TG |
75 | * D = Don't Learn bit. When set, this bit indicates that the egress |
76 | * VTEP MUST NOT learn the source address of the encapsulated frame. | |
77 | * | |
78 | * A = Indicates that the group policy has already been applied to | |
79 | * this packet. Policies MUST NOT be applied by devices when the | |
80 | * A bit is set. | |
81 | * | |
828788ac | 82 | * https://tools.ietf.org/html/draft-smith-vxlan-group-policy |
3511494c TG |
83 | */ |
84 | struct vxlanhdr_gbp { | |
0e715d6f | 85 | u8 vx_flags; |
3511494c | 86 | #ifdef __LITTLE_ENDIAN_BITFIELD |
0e715d6f | 87 | u8 reserved_flags1:3, |
3511494c TG |
88 | policy_applied:1, |
89 | reserved_flags2:2, | |
90 | dont_learn:1, | |
91 | reserved_flags3:1; | |
92 | #elif defined(__BIG_ENDIAN_BITFIELD) | |
0e715d6f | 93 | u8 reserved_flags1:1, |
3511494c TG |
94 | dont_learn:1, |
95 | reserved_flags2:2, | |
96 | policy_applied:1, | |
97 | reserved_flags3:3; | |
98 | #else | |
99 | #error "Please fix <asm/byteorder.h>" | |
100 | #endif | |
101 | __be16 policy_id; | |
102 | __be32 vx_vni; | |
103 | }; | |
104 | ||
828788ac | 105 | /* VXLAN-GBP header flags. */ |
54bfd872 | 106 | #define VXLAN_HF_GBP cpu_to_be32(BIT(31)) |
828788ac | 107 | |
54bfd872 | 108 | #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF)) |
3511494c TG |
109 | |
110 | /* skb->mark mapping | |
111 | * | |
112 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
113 | * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | | |
114 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
115 | */ | |
116 | #define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) | |
117 | #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) | |
118 | #define VXLAN_GBP_ID_MASK (0xFFFF) | |
119 | ||
e1e5314d JB |
120 | /* |
121 | * VXLAN Generic Protocol Extension (VXLAN_F_GPE): | |
122 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
123 | * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | | |
124 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
125 | * | VXLAN Network Identifier (VNI) | Reserved | | |
126 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
127 | * | |
128 | * Ver = Version. Indicates VXLAN GPE protocol version. | |
129 | * | |
130 | * P = Next Protocol Bit. The P bit is set to indicate that the | |
131 | * Next Protocol field is present. | |
132 | * | |
133 | * O = OAM Flag Bit. The O bit is set to indicate that the packet | |
134 | * is an OAM packet. | |
135 | * | |
136 | * Next Protocol = This 8 bit field indicates the protocol header | |
137 | * immediately following the VXLAN GPE header. | |
138 | * | |
139 | * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 | |
140 | */ | |
141 | ||
142 | struct vxlanhdr_gpe { | |
143 | #if defined(__LITTLE_ENDIAN_BITFIELD) | |
144 | u8 oam_flag:1, | |
145 | reserved_flags1:1, | |
146 | np_applied:1, | |
147 | instance_applied:1, | |
148 | version:2, | |
f1c8d372 | 149 | reserved_flags2:2; |
e1e5314d JB |
150 | #elif defined(__BIG_ENDIAN_BITFIELD) |
151 | u8 reserved_flags2:2, | |
152 | version:2, | |
153 | instance_applied:1, | |
154 | np_applied:1, | |
155 | reserved_flags1:1, | |
156 | oam_flag:1; | |
157 | #endif | |
158 | u8 reserved_flags3; | |
159 | u8 reserved_flags4; | |
160 | u8 next_protocol; | |
161 | __be32 vx_vni; | |
162 | }; | |
163 | ||
164 | /* VXLAN-GPE header flags. */ | |
165 | #define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) | |
166 | #define VXLAN_HF_NP cpu_to_be32(BIT(26)) | |
167 | #define VXLAN_HF_OAM cpu_to_be32(BIT(24)) | |
168 | ||
169 | #define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ | |
170 | cpu_to_be32(0xff)) | |
171 | ||
3511494c | 172 | struct vxlan_metadata { |
3511494c TG |
173 | u32 gbp; |
174 | }; | |
175 | ||
012a5729 PS |
176 | /* per UDP socket information */ |
177 | struct vxlan_sock { | |
178 | struct hlist_node hlist; | |
012a5729 | 179 | struct socket *sock; |
012a5729 | 180 | struct hlist_head vni_list[VNI_HASH_SIZE]; |
66af846f | 181 | refcount_t refcnt; |
dfd8645e | 182 | u32 flags; |
012a5729 PS |
183 | }; |
184 | ||
0dfbdf41 TG |
185 | union vxlan_addr { |
186 | struct sockaddr_in sin; | |
187 | struct sockaddr_in6 sin6; | |
188 | struct sockaddr sa; | |
189 | }; | |
190 | ||
191 | struct vxlan_rdst { | |
192 | union vxlan_addr remote_ip; | |
193 | __be16 remote_port; | |
54bfd872 | 194 | __be32 remote_vni; |
0dfbdf41 TG |
195 | u32 remote_ifindex; |
196 | struct list_head list; | |
197 | struct rcu_head rcu; | |
0c1d70af | 198 | struct dst_cache dst_cache; |
0dfbdf41 TG |
199 | }; |
200 | ||
201 | struct vxlan_config { | |
202 | union vxlan_addr remote_ip; | |
203 | union vxlan_addr saddr; | |
54bfd872 | 204 | __be32 vni; |
0dfbdf41 TG |
205 | int remote_ifindex; |
206 | int mtu; | |
207 | __be16 dst_port; | |
0e715d6f JB |
208 | u16 port_min; |
209 | u16 port_max; | |
210 | u8 tos; | |
211 | u8 ttl; | |
e7f70af1 | 212 | __be32 label; |
0dfbdf41 TG |
213 | u32 flags; |
214 | unsigned long age_interval; | |
215 | unsigned int addrmax; | |
216 | bool no_share; | |
217 | }; | |
218 | ||
69e76661 JB |
219 | struct vxlan_dev_node { |
220 | struct hlist_node hlist; | |
221 | struct vxlan_dev *vxlan; | |
222 | }; | |
223 | ||
0dfbdf41 TG |
224 | /* Pseudo network device */ |
225 | struct vxlan_dev { | |
69e76661 JB |
226 | struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */ |
227 | #if IS_ENABLED(CONFIG_IPV6) | |
228 | struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */ | |
229 | #endif | |
0dfbdf41 | 230 | struct list_head next; /* vxlan's per namespace list */ |
c6fcc4fc | 231 | struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ |
b1be00a6 | 232 | #if IS_ENABLED(CONFIG_IPV6) |
c6fcc4fc | 233 | struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ |
b1be00a6 | 234 | #endif |
0dfbdf41 TG |
235 | struct net_device *dev; |
236 | struct net *net; /* netns for packet i/o */ | |
237 | struct vxlan_rdst default_dst; /* default destination */ | |
0dfbdf41 | 238 | |
d57420a1 JV |
239 | struct ip_tunnel_fan fan; |
240 | ||
0dfbdf41 TG |
241 | struct timer_list age_timer; |
242 | spinlock_t hash_lock; | |
243 | unsigned int addrcnt; | |
58ce31cc | 244 | struct gro_cells gro_cells; |
0dfbdf41 TG |
245 | |
246 | struct vxlan_config cfg; | |
247 | ||
248 | struct hlist_head fdb_head[FDB_HASH_SIZE]; | |
249 | }; | |
250 | ||
359a0ea9 TH |
251 | #define VXLAN_F_LEARN 0x01 |
252 | #define VXLAN_F_PROXY 0x02 | |
253 | #define VXLAN_F_RSC 0x04 | |
254 | #define VXLAN_F_L2MISS 0x08 | |
255 | #define VXLAN_F_L3MISS 0x10 | |
256 | #define VXLAN_F_IPV6 0x20 | |
6ceb31ca | 257 | #define VXLAN_F_UDP_ZERO_CSUM_TX 0x40 |
359a0ea9 TH |
258 | #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 |
259 | #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 | |
dfd8645e TH |
260 | #define VXLAN_F_REMCSUM_TX 0x200 |
261 | #define VXLAN_F_REMCSUM_RX 0x400 | |
3511494c | 262 | #define VXLAN_F_GBP 0x800 |
0ace2ca8 | 263 | #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 |
ee122c79 | 264 | #define VXLAN_F_COLLECT_METADATA 0x2000 |
e1e5314d | 265 | #define VXLAN_F_GPE 0x4000 |
0f22a3c6 | 266 | #define VXLAN_F_IPV6_LINKLOCAL 0x8000 |
359a0ea9 | 267 | |
d299ce14 | 268 | /* Flags that are used in the receive path. These flags must match in |
af33c1ad TH |
269 | * order for a socket to be shareable |
270 | */ | |
271 | #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ | |
e1e5314d | 272 | VXLAN_F_GPE | \ |
af33c1ad | 273 | VXLAN_F_UDP_ZERO_CSUM6_RX | \ |
0ace2ca8 | 274 | VXLAN_F_REMCSUM_RX | \ |
ee122c79 | 275 | VXLAN_F_REMCSUM_NOPARTIAL | \ |
da8b43c0 | 276 | VXLAN_F_COLLECT_METADATA) |
ac5132d1 | 277 | |
e1e5314d JB |
278 | /* Flags that can be set together with VXLAN_F_GPE. */ |
279 | #define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ | |
280 | VXLAN_F_IPV6 | \ | |
0f22a3c6 | 281 | VXLAN_F_IPV6_LINKLOCAL | \ |
e1e5314d JB |
282 | VXLAN_F_UDP_ZERO_CSUM_TX | \ |
283 | VXLAN_F_UDP_ZERO_CSUM6_TX | \ | |
284 | VXLAN_F_UDP_ZERO_CSUM6_RX | \ | |
285 | VXLAN_F_COLLECT_METADATA) | |
286 | ||
0dfbdf41 TG |
287 | struct net_device *vxlan_dev_create(struct net *net, const char *name, |
288 | u8 name_assign_type, struct vxlan_config *conf); | |
289 | ||
5f35227e JG |
290 | static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, |
291 | netdev_features_t features) | |
11bf7828 | 292 | { |
5f35227e JG |
293 | u8 l4_hdr = 0; |
294 | ||
295 | if (!skb->encapsulation) | |
296 | return features; | |
297 | ||
298 | switch (vlan_get_protocol(skb)) { | |
299 | case htons(ETH_P_IP): | |
300 | l4_hdr = ip_hdr(skb)->protocol; | |
301 | break; | |
302 | case htons(ETH_P_IPV6): | |
303 | l4_hdr = ipv6_hdr(skb)->nexthdr; | |
304 | break; | |
305 | default: | |
306 | return features;; | |
307 | } | |
308 | ||
309 | if ((l4_hdr == IPPROTO_UDP) && | |
11bf7828 JS |
310 | (skb->inner_protocol_type != ENCAP_TYPE_ETHER || |
311 | skb->inner_protocol != htons(ETH_P_TEB) || | |
312 | (skb_inner_mac_header(skb) - skb_transport_header(skb) != | |
af67eb9e AD |
313 | sizeof(struct udphdr) + sizeof(struct vxlanhdr)) || |
314 | (skb->ip_summed != CHECKSUM_NONE && | |
315 | !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto)))) | |
a188222b | 316 | return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); |
11bf7828 | 317 | |
5f35227e | 318 | return features; |
11bf7828 | 319 | } |
23e62de3 | 320 | |
e6cd988c JG |
321 | /* IP header + UDP + VXLAN + Ethernet header */ |
322 | #define VXLAN_HEADROOM (20 + 8 + 8 + 14) | |
323 | /* IPv6 header + UDP + VXLAN + Ethernet header */ | |
324 | #define VXLAN6_HEADROOM (40 + 8 + 8 + 14) | |
325 | ||
d4ac05ff JB |
326 | static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb) |
327 | { | |
328 | return (struct vxlanhdr *)(udp_hdr(skb) + 1); | |
329 | } | |
330 | ||
54bfd872 JB |
331 | static inline __be32 vxlan_vni(__be32 vni_field) |
332 | { | |
333 | #if defined(__BIG_ENDIAN) | |
5692d7ea | 334 | return (__force __be32)((__force u32)vni_field >> 8); |
54bfd872 | 335 | #else |
5692d7ea | 336 | return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8); |
54bfd872 JB |
337 | #endif |
338 | } | |
339 | ||
340 | static inline __be32 vxlan_vni_field(__be32 vni) | |
341 | { | |
342 | #if defined(__BIG_ENDIAN) | |
5692d7ea | 343 | return (__force __be32)((__force u32)vni << 8); |
54bfd872 | 344 | #else |
5692d7ea | 345 | return (__force __be32)((__force u32)vni >> 8); |
54bfd872 JB |
346 | #endif |
347 | } | |
348 | ||
54bfd872 JB |
349 | static inline size_t vxlan_rco_start(__be32 vni_field) |
350 | { | |
351 | return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; | |
352 | } | |
353 | ||
354 | static inline size_t vxlan_rco_offset(__be32 vni_field) | |
355 | { | |
356 | return (vni_field & VXLAN_RCO_UDP) ? | |
357 | offsetof(struct udphdr, check) : | |
358 | offsetof(struct tcphdr, check); | |
359 | } | |
360 | ||
361 | static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset) | |
362 | { | |
363 | __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT); | |
364 | ||
365 | if (offset == offsetof(struct udphdr, check)) | |
366 | vni_field |= VXLAN_RCO_UDP; | |
367 | return vni_field; | |
368 | } | |
369 | ||
705cc62f JB |
370 | static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs) |
371 | { | |
372 | return vs->sock->sk->sk_family; | |
373 | } | |
48e92c44 JB |
374 | |
375 | #endif |