]>
Commit | Line | Data |
---|---|---|
d1eb60cc | 1 | /* |
e0edde6f | 2 | * Copyright (c) 2007-2012 Nicira, Inc. |
d1eb60cc | 3 | * |
a9a29d22 JG |
4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * You should have received a copy of the GNU General Public License | |
14 | * along with this program; if not, write to the Free Software | |
15 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
16 | * 02110-1301, USA | |
d1eb60cc JG |
17 | */ |
18 | ||
19 | #ifndef TUNNEL_H | |
20 | #define TUNNEL_H 1 | |
21 | ||
842cf6f4 | 22 | #include <linux/version.h> |
2a4999f3 PS |
23 | #include <net/net_namespace.h> |
24 | #include <net/netns/generic.h> | |
842cf6f4 JG |
25 | |
26 | #include "flow.h" | |
d1eb60cc | 27 | #include "openvswitch/tunnel.h" |
d1eb60cc JG |
28 | #include "vport.h" |
29 | ||
5214f5c4 JG |
30 | /* |
31 | * The absolute minimum fragment size. Note that there are many other | |
32 | * definitions of the minimum MTU. | |
33 | */ | |
d1eb60cc JG |
34 | #define IP_MIN_MTU 68 |
35 | ||
36 | /* | |
842cf6f4 | 37 | * One of these goes in struct tnl_ops and in tnl_find_port(). |
d1eb60cc | 38 | * These values are in the same namespace as other TNL_T_* values, so |
842cf6f4 JG |
39 | * only the least significant 10 bits are available to define protocol |
40 | * identifiers. | |
d1eb60cc JG |
41 | */ |
42 | #define TNL_T_PROTO_GRE 0 | |
e90b1cf9 | 43 | #define TNL_T_PROTO_CAPWAP 1 |
d1eb60cc | 44 | |
842cf6f4 | 45 | /* These flags are only needed when calling tnl_find_port(). */ |
d1eb60cc JG |
46 | #define TNL_T_KEY_EXACT (1 << 10) |
47 | #define TNL_T_KEY_MATCH (1 << 11) | |
d1eb60cc | 48 | |
c19e6535 | 49 | /* Private flags not exposed to userspace in this form. */ |
6455100f PS |
50 | #define TNL_F_IN_KEY_MATCH (1 << 16) /* Store the key in tun_id to |
51 | * match in flow table. */ | |
52 | #define TNL_F_OUT_KEY_ACTION (1 << 17) /* Get the key from a SET_TUNNEL | |
53 | * action. */ | |
c19e6535 BP |
54 | |
55 | /* All public tunnel flags. */ | |
56 | #define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \ | |
dc432c2e AE |
57 | TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \ |
58 | TNL_F_HDR_CACHE | TNL_F_IPSEC) | |
c19e6535 | 59 | |
f686a33a PS |
60 | /** |
61 | * struct port_lookup_key - Tunnel port key, used as hash table key. | |
62 | * @in_key: Key to match on input, 0 for wildcard. | |
2a4999f3 | 63 | * @net: Network namespace of the port. |
f686a33a PS |
64 | * @saddr: IPv4 source address to match, 0 to accept any source address. |
65 | * @daddr: IPv4 destination of tunnel. | |
66 | * @tunnel_type: Set of TNL_T_* flags that define lookup. | |
67 | */ | |
68 | struct port_lookup_key { | |
69 | __be64 in_key; | |
2a4999f3 PS |
70 | #ifdef CONFIG_NET_NS |
71 | struct net *net; | |
72 | #endif | |
f686a33a PS |
73 | __be32 saddr; |
74 | __be32 daddr; | |
75 | u32 tunnel_type; | |
76 | }; | |
77 | ||
6455100f | 78 | #define PORT_KEY_LEN (offsetof(struct port_lookup_key, tunnel_type) + \ |
bd85a68f PS |
79 | FIELD_SIZEOF(struct port_lookup_key, tunnel_type)) |
80 | ||
2a4999f3 PS |
81 | static inline struct net *port_key_get_net(const struct port_lookup_key *key) |
82 | { | |
83 | return read_pnet(&key->net); | |
84 | } | |
85 | ||
86 | static inline void port_key_set_net(struct port_lookup_key *key, struct net *net) | |
87 | { | |
88 | write_pnet(&key->net, net); | |
89 | } | |
90 | ||
c19e6535 BP |
91 | /** |
92 | * struct tnl_mutable_config - modifiable configuration for a tunnel. | |
f686a33a PS |
93 | * @key: Used as key for tunnel port. Configured via OVS_TUNNEL_ATTR_* |
94 | * attributes. | |
c19e6535 BP |
95 | * @rcu: RCU callback head for deferred destruction. |
96 | * @seq: Sequence number for distinguishing configuration versions. | |
c19e6535 BP |
97 | * @tunnel_hlen: Tunnel header length. |
98 | * @eth_addr: Source address for packets generated by tunnel itself | |
99 | * (e.g. ICMP fragmentation needed messages). | |
c19e6535 BP |
100 | * @out_key: Key to use on output, 0 if this tunnel has no fixed output key. |
101 | * @flags: TNL_F_* flags. | |
c19e6535 BP |
102 | * @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS. |
103 | * @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL. | |
104 | */ | |
d1eb60cc | 105 | struct tnl_mutable_config { |
f686a33a | 106 | struct port_lookup_key key; |
d1eb60cc JG |
107 | struct rcu_head rcu; |
108 | ||
c19e6535 | 109 | unsigned seq; |
d1eb60cc | 110 | |
c19e6535 | 111 | unsigned tunnel_hlen; |
842cf6f4 JG |
112 | |
113 | unsigned char eth_addr[ETH_ALEN]; | |
d1eb60cc | 114 | |
df2c07f4 | 115 | /* Configured via OVS_TUNNEL_ATTR_* attributes. */ |
c19e6535 BP |
116 | __be64 out_key; |
117 | u32 flags; | |
c19e6535 BP |
118 | u8 tos; |
119 | u8 ttl; | |
b37e6334 BP |
120 | |
121 | /* Multicast configuration. */ | |
122 | int mlink; | |
d1eb60cc JG |
123 | }; |
124 | ||
125 | struct tnl_ops { | |
842cf6f4 JG |
126 | u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */ |
127 | u8 ipproto; /* The IP protocol for the tunnel. */ | |
d1eb60cc | 128 | |
5214f5c4 | 129 | /* |
842cf6f4 | 130 | * Returns the length of the tunnel header that will be added in |
5214f5c4 JG |
131 | * build_header() (i.e. excludes the IP header). Returns a negative |
132 | * error code if the configuration is invalid. | |
133 | */ | |
c19e6535 | 134 | int (*hdr_len)(const struct tnl_mutable_config *); |
5214f5c4 JG |
135 | |
136 | /* | |
842cf6f4 JG |
137 | * Builds the static portion of the tunnel header, which is stored in |
138 | * the header cache. In general the performance of this function is | |
139 | * not too important as we try to only call it when building the cache | |
140 | * so it is preferable to shift as much work as possible here. However, | |
141 | * in some circumstances caching is disabled and this function will be | |
142 | * called for every packet, so try not to make it too slow. | |
143 | */ | |
144 | void (*build_header)(const struct vport *, | |
145 | const struct tnl_mutable_config *, void *header); | |
146 | ||
147 | /* | |
148 | * Updates the cached header of a packet to match the actual packet | |
149 | * data. Typical things that might need to be updated are length, | |
150 | * checksum, etc. The IP header will have already been updated and this | |
151 | * is the final step before transmission. Returns a linked list of | |
152 | * completed SKBs (multiple packets may be generated in the event | |
153 | * of fragmentation). | |
154 | */ | |
155 | struct sk_buff *(*update_header)(const struct vport *, | |
156 | const struct tnl_mutable_config *, | |
157 | struct dst_entry *, struct sk_buff *); | |
158 | }; | |
159 | ||
160 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) | |
161 | /* | |
162 | * On these kernels we have a fast mechanism to tell if the ARP cache for a | |
163 | * particular destination has changed. | |
164 | */ | |
165 | #define HAVE_HH_SEQ | |
166 | #endif | |
167 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) | |
168 | /* | |
169 | * On these kernels we have a fast mechanism to tell if the routing table | |
170 | * has changed. | |
171 | */ | |
172 | #define HAVE_RT_GENID | |
173 | #endif | |
174 | #if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID) | |
175 | /* If we can't detect all system changes directly we need to use a timeout. */ | |
176 | #define NEED_CACHE_TIMEOUT | |
177 | #endif | |
178 | struct tnl_cache { | |
179 | struct rcu_head rcu; | |
180 | ||
181 | int len; /* Length of data to be memcpy'd from cache. */ | |
c2310ade | 182 | int hh_len; /* Hardware hdr length, cached from hh_cache. */ |
842cf6f4 | 183 | |
6455100f PS |
184 | /* Sequence number of mutable->seq from which this cache was |
185 | * generated. */ | |
842cf6f4 JG |
186 | unsigned mutable_seq; |
187 | ||
188 | #ifdef HAVE_HH_SEQ | |
189 | /* | |
190 | * The sequence number from the seqlock protecting the hardware header | |
191 | * cache (in the ARP cache). Since every write increments the counter | |
192 | * this gives us an easy way to tell if it has changed. | |
193 | */ | |
194 | unsigned hh_seq; | |
195 | #endif | |
196 | ||
197 | #ifdef NEED_CACHE_TIMEOUT | |
198 | /* | |
199 | * If we don't have direct mechanisms to detect all important changes in | |
200 | * the system fall back to an expiration time. This expiration time | |
201 | * can be relatively short since at high rates there will be millions of | |
202 | * packets per second, so we'll still get plenty of benefit from the | |
203 | * cache. Note that if something changes we may blackhole packets | |
204 | * until the expiration time (depending on what changed and the kernel | |
205 | * version we may be able to detect the change sooner). Expiration is | |
206 | * expressed as a time in jiffies. | |
5214f5c4 | 207 | */ |
842cf6f4 JG |
208 | unsigned long expiration; |
209 | #endif | |
210 | ||
211 | /* | |
212 | * The routing table entry that is the result of looking up the tunnel | |
213 | * endpoints. It also contains a sequence number (called a generation | |
214 | * ID) that can be compared to a global sequence to tell if the routing | |
215 | * table has changed (and therefore there is a potential that this | |
216 | * cached route has been invalidated). | |
217 | */ | |
218 | struct rtable *rt; | |
219 | ||
220 | /* | |
221 | * If the output device for tunnel traffic is an OVS internal device, | |
222 | * the flow of that datapath. Since all tunnel traffic will have the | |
223 | * same headers this allows us to cache the flow lookup. NULL if the | |
224 | * output device is not OVS or if there is no flow installed. | |
225 | */ | |
226 | struct sw_flow *flow; | |
227 | ||
228 | /* The cached header follows after padding for alignment. */ | |
d1eb60cc JG |
229 | }; |
230 | ||
231 | struct tnl_vport { | |
232 | struct rcu_head rcu; | |
3544358a | 233 | struct hlist_node hash_node; |
d1eb60cc JG |
234 | |
235 | char name[IFNAMSIZ]; | |
236 | const struct tnl_ops *tnl_ops; | |
237 | ||
e1040c77 | 238 | struct tnl_mutable_config __rcu *mutable; |
5214f5c4 | 239 | |
842cf6f4 JG |
240 | /* |
241 | * ID of last fragment sent (for tunnel protocols with direct support | |
242 | * fragmentation). If the protocol relies on IP fragmentation then | |
243 | * this is not needed. | |
244 | */ | |
5214f5c4 | 245 | atomic_t frag_id; |
842cf6f4 JG |
246 | |
247 | spinlock_t cache_lock; | |
6455100f | 248 | struct tnl_cache __rcu *cache; /* Protected by RCU/cache_lock. */ |
842cf6f4 JG |
249 | |
250 | #ifdef NEED_CACHE_TIMEOUT | |
251 | /* | |
252 | * If we must rely on expiration time to invalidate the cache, this is | |
253 | * the interval. It is randomized within a range (defined by | |
254 | * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused | |
255 | * by creation of a large number of tunnels at a one time. | |
256 | */ | |
257 | unsigned long cache_exp_interval; | |
258 | #endif | |
d1eb60cc JG |
259 | }; |
260 | ||
850b6b3b JG |
261 | struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *, |
262 | const struct tnl_ops *); | |
263 | void ovs_tnl_destroy(struct vport *); | |
264 | ||
265 | int ovs_tnl_set_options(struct vport *, struct nlattr *); | |
266 | int ovs_tnl_get_options(const struct vport *, struct sk_buff *); | |
267 | ||
268 | int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr); | |
269 | const char *ovs_tnl_get_name(const struct vport *vport); | |
270 | const unsigned char *ovs_tnl_get_addr(const struct vport *vport); | |
271 | int ovs_tnl_send(struct vport *vport, struct sk_buff *skb); | |
272 | void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos); | |
273 | ||
2a4999f3 PS |
274 | struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr, |
275 | __be64 key, int tunnel_type, | |
850b6b3b JG |
276 | const struct tnl_mutable_config **mutable); |
277 | bool ovs_tnl_frag_needed(struct vport *vport, | |
278 | const struct tnl_mutable_config *mutable, | |
279 | struct sk_buff *skb, unsigned int mtu, __be64 flow_key); | |
280 | void ovs_tnl_free_linked_skbs(struct sk_buff *skb); | |
281 | ||
282 | int ovs_tnl_init(void); | |
283 | void ovs_tnl_exit(void); | |
d1eb60cc JG |
284 | static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport) |
285 | { | |
286 | return vport_priv(vport); | |
287 | } | |
288 | ||
289 | #endif /* tunnel.h */ |