]>
Commit | Line | Data |
---|---|---|
012a5729 PS |
1 | #ifndef __NET_VXLAN_H |
2 | #define __NET_VXLAN_H 1 | |
3 | ||
5f35227e JG |
4 | #include <linux/ip.h> |
5 | #include <linux/ipv6.h> | |
6 | #include <linux/if_vlan.h> | |
012a5729 PS |
7 | #include <linux/skbuff.h> |
8 | #include <linux/netdevice.h> | |
9 | #include <linux/udp.h> | |
ee122c79 | 10 | #include <net/dst_metadata.h> |
012a5729 PS |
11 | |
12 | #define VNI_HASH_BITS 10 | |
13 | #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) | |
14 | ||
3511494c TG |
15 | /* |
16 | * VXLAN Group Based Policy Extension: | |
17 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
18 | * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID | | |
19 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
20 | * | VXLAN Network Identifier (VNI) | Reserved | | |
21 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
22 | * | |
23 | * D = Don't Learn bit. When set, this bit indicates that the egress | |
24 | * VTEP MUST NOT learn the source address of the encapsulated frame. | |
25 | * | |
26 | * A = Indicates that the group policy has already been applied to | |
27 | * this packet. Policies MUST NOT be applied by devices when the | |
28 | * A bit is set. | |
29 | * | |
30 | * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy | |
31 | */ | |
32 | struct vxlanhdr_gbp { | |
33 | __u8 vx_flags; | |
34 | #ifdef __LITTLE_ENDIAN_BITFIELD | |
35 | __u8 reserved_flags1:3, | |
36 | policy_applied:1, | |
37 | reserved_flags2:2, | |
38 | dont_learn:1, | |
39 | reserved_flags3:1; | |
40 | #elif defined(__BIG_ENDIAN_BITFIELD) | |
41 | __u8 reserved_flags1:1, | |
42 | dont_learn:1, | |
43 | reserved_flags2:2, | |
44 | policy_applied:1, | |
45 | reserved_flags3:3; | |
46 | #else | |
47 | #error "Please fix <asm/byteorder.h>" | |
48 | #endif | |
49 | __be16 policy_id; | |
50 | __be32 vx_vni; | |
51 | }; | |
52 | ||
53 | #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) | |
54 | ||
55 | /* skb->mark mapping | |
56 | * | |
57 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
58 | * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | | |
59 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
60 | */ | |
61 | #define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) | |
62 | #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) | |
63 | #define VXLAN_GBP_ID_MASK (0xFFFF) | |
64 | ||
65 | /* VXLAN protocol header: | |
66 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
67 | * |G|R|R|R|I|R|R|C| Reserved | | |
68 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
69 | * | VXLAN Network Identifier (VNI) | Reserved | | |
70 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
71 | * | |
72 | * G = 1 Group Policy (VXLAN-GBP) | |
73 | * I = 1 VXLAN Network Identifier (VNI) present | |
74 | * C = 1 Remote checksum offload (RCO) | |
75 | */ | |
11bf7828 JS |
76 | struct vxlanhdr { |
77 | __be32 vx_flags; | |
78 | __be32 vx_vni; | |
79 | }; | |
80 | ||
3bf39475 | 81 | /* VXLAN header flags. */ |
3511494c TG |
82 | #define VXLAN_HF_RCO BIT(24) |
83 | #define VXLAN_HF_VNI BIT(27) | |
84 | #define VXLAN_HF_GBP BIT(31) | |
dfd8645e TH |
85 | |
86 | /* Remote checksum offload header option */ | |
87 | #define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ | |
88 | #define VXLAN_RCO_UDP 0x80 /* Indicate UDP RCO (TCP when not set *) */ | |
89 | #define VXLAN_RCO_SHIFT 1 /* Left shift of start */ | |
90 | #define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1) | |
91 | #define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT) | |
3bf39475 TH |
92 | |
93 | #define VXLAN_N_VID (1u << 24) | |
94 | #define VXLAN_VID_MASK (VXLAN_N_VID - 1) | |
40fb70f3 | 95 | #define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) |
3bf39475 TH |
96 | #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) |
97 | ||
0dfbdf41 TG |
98 | #define VNI_HASH_BITS 10 |
99 | #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) | |
100 | #define FDB_HASH_BITS 8 | |
101 | #define FDB_HASH_SIZE (1<<FDB_HASH_BITS) | |
102 | ||
3511494c TG |
103 | struct vxlan_metadata { |
104 | __be32 vni; | |
105 | u32 gbp; | |
ee122c79 TG |
106 | |
107 | /* Temporary until vxlan_rcv() API is gone */ | |
108 | struct metadata_dst *tun_dst; | |
3511494c TG |
109 | }; |
110 | ||
012a5729 | 111 | struct vxlan_sock; |
3511494c TG |
112 | typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, |
113 | struct vxlan_metadata *md); | |
012a5729 PS |
114 | |
115 | /* per UDP socket information */ | |
116 | struct vxlan_sock { | |
117 | struct hlist_node hlist; | |
118 | vxlan_rcv_t *rcv; | |
119 | void *data; | |
120 | struct work_struct del_work; | |
121 | struct socket *sock; | |
122 | struct rcu_head rcu; | |
123 | struct hlist_head vni_list[VNI_HASH_SIZE]; | |
124 | atomic_t refcnt; | |
dc01e7d3 | 125 | struct udp_offload udp_offloads; |
dfd8645e | 126 | u32 flags; |
012a5729 PS |
127 | }; |
128 | ||
0dfbdf41 TG |
129 | union vxlan_addr { |
130 | struct sockaddr_in sin; | |
131 | struct sockaddr_in6 sin6; | |
132 | struct sockaddr sa; | |
133 | }; | |
134 | ||
135 | struct vxlan_rdst { | |
136 | union vxlan_addr remote_ip; | |
137 | __be16 remote_port; | |
138 | u32 remote_vni; | |
139 | u32 remote_ifindex; | |
140 | struct list_head list; | |
141 | struct rcu_head rcu; | |
142 | }; | |
143 | ||
144 | struct vxlan_config { | |
145 | union vxlan_addr remote_ip; | |
146 | union vxlan_addr saddr; | |
147 | u32 vni; | |
148 | int remote_ifindex; | |
149 | int mtu; | |
150 | __be16 dst_port; | |
151 | __u16 port_min; | |
152 | __u16 port_max; | |
153 | __u8 tos; | |
154 | __u8 ttl; | |
155 | u32 flags; | |
156 | unsigned long age_interval; | |
157 | unsigned int addrmax; | |
158 | bool no_share; | |
159 | }; | |
160 | ||
161 | /* Pseudo network device */ | |
162 | struct vxlan_dev { | |
163 | struct hlist_node hlist; /* vni hash table */ | |
164 | struct list_head next; /* vxlan's per namespace list */ | |
165 | struct vxlan_sock *vn_sock; /* listening socket */ | |
166 | struct net_device *dev; | |
167 | struct net *net; /* netns for packet i/o */ | |
168 | struct vxlan_rdst default_dst; /* default destination */ | |
169 | u32 flags; /* VXLAN_F_* in vxlan.h */ | |
170 | ||
171 | struct timer_list age_timer; | |
172 | spinlock_t hash_lock; | |
173 | unsigned int addrcnt; | |
174 | ||
175 | struct vxlan_config cfg; | |
176 | ||
177 | struct hlist_head fdb_head[FDB_HASH_SIZE]; | |
178 | }; | |
179 | ||
359a0ea9 TH |
180 | #define VXLAN_F_LEARN 0x01 |
181 | #define VXLAN_F_PROXY 0x02 | |
182 | #define VXLAN_F_RSC 0x04 | |
183 | #define VXLAN_F_L2MISS 0x08 | |
184 | #define VXLAN_F_L3MISS 0x10 | |
185 | #define VXLAN_F_IPV6 0x20 | |
186 | #define VXLAN_F_UDP_CSUM 0x40 | |
187 | #define VXLAN_F_UDP_ZERO_CSUM6_TX 0x80 | |
188 | #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 | |
dfd8645e TH |
189 | #define VXLAN_F_REMCSUM_TX 0x200 |
190 | #define VXLAN_F_REMCSUM_RX 0x400 | |
3511494c | 191 | #define VXLAN_F_GBP 0x800 |
0ace2ca8 | 192 | #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 |
ee122c79 TG |
193 | #define VXLAN_F_COLLECT_METADATA 0x2000 |
194 | #define VXLAN_F_FLOW_BASED 0x4000 | |
359a0ea9 | 195 | |
d299ce14 | 196 | /* Flags that are used in the receive path. These flags must match in |
af33c1ad TH |
197 | * order for a socket to be shareable |
198 | */ | |
199 | #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ | |
200 | VXLAN_F_UDP_ZERO_CSUM6_RX | \ | |
0ace2ca8 | 201 | VXLAN_F_REMCSUM_RX | \ |
ee122c79 TG |
202 | VXLAN_F_REMCSUM_NOPARTIAL | \ |
203 | VXLAN_F_COLLECT_METADATA | \ | |
204 | VXLAN_F_FLOW_BASED) | |
ac5132d1 | 205 | |
012a5729 PS |
206 | struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, |
207 | vxlan_rcv_t *rcv, void *data, | |
359a0ea9 | 208 | bool no_share, u32 flags); |
012a5729 | 209 | |
0dfbdf41 TG |
210 | struct net_device *vxlan_dev_create(struct net *net, const char *name, |
211 | u8 name_assign_type, struct vxlan_config *conf); | |
212 | ||
012a5729 | 213 | void vxlan_sock_release(struct vxlan_sock *vs); |
49560532 | 214 | |
79b16aad | 215 | int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, |
49560532 | 216 | __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, |
3511494c | 217 | __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, |
af33c1ad | 218 | bool xnet, u32 vxflags); |
49560532 | 219 | |
5f35227e JG |
220 | static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, |
221 | netdev_features_t features) | |
11bf7828 | 222 | { |
5f35227e JG |
223 | u8 l4_hdr = 0; |
224 | ||
225 | if (!skb->encapsulation) | |
226 | return features; | |
227 | ||
228 | switch (vlan_get_protocol(skb)) { | |
229 | case htons(ETH_P_IP): | |
230 | l4_hdr = ip_hdr(skb)->protocol; | |
231 | break; | |
232 | case htons(ETH_P_IPV6): | |
233 | l4_hdr = ipv6_hdr(skb)->nexthdr; | |
234 | break; | |
235 | default: | |
236 | return features;; | |
237 | } | |
238 | ||
239 | if ((l4_hdr == IPPROTO_UDP) && | |
11bf7828 JS |
240 | (skb->inner_protocol_type != ENCAP_TYPE_ETHER || |
241 | skb->inner_protocol != htons(ETH_P_TEB) || | |
242 | (skb_inner_mac_header(skb) - skb_transport_header(skb) != | |
243 | sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) | |
5f35227e | 244 | return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); |
11bf7828 | 245 | |
5f35227e | 246 | return features; |
11bf7828 | 247 | } |
23e62de3 | 248 | |
e6cd988c JG |
249 | /* IP header + UDP + VXLAN + Ethernet header */ |
250 | #define VXLAN_HEADROOM (20 + 8 + 8 + 14) | |
251 | /* IPv6 header + UDP + VXLAN + Ethernet header */ | |
252 | #define VXLAN6_HEADROOM (40 + 8 + 8 + 14) | |
253 | ||
254 | #if IS_ENABLED(CONFIG_VXLAN) | |
53cf5275 | 255 | void vxlan_get_rx_port(struct net_device *netdev); |
e6cd988c JG |
256 | #else |
257 | static inline void vxlan_get_rx_port(struct net_device *netdev) | |
258 | { | |
259 | } | |
260 | #endif | |
012a5729 | 261 | #endif |