]> git.proxmox.com Git - ovs.git/blame - datapath/tunnel.h
Global replace of Nicira Networks.
[ovs.git] / datapath / tunnel.h
CommitLineData
d1eb60cc 1/*
e0edde6f 2 * Copyright (c) 2007-2012 Nicira, Inc.
d1eb60cc 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
d1eb60cc
JG
17 */
18
19#ifndef TUNNEL_H
20#define TUNNEL_H 1
21
842cf6f4 22#include <linux/version.h>
2a4999f3
PS
23#include <net/net_namespace.h>
24#include <net/netns/generic.h>
842cf6f4
JG
25
26#include "flow.h"
d1eb60cc 27#include "openvswitch/tunnel.h"
d1eb60cc
JG
28#include "vport.h"
29
5214f5c4
JG
30/*
31 * The absolute minimum fragment size. Note that there are many other
32 * definitions of the minimum MTU.
33 */
d1eb60cc
JG
34#define IP_MIN_MTU 68
35
36/*
842cf6f4 37 * One of these goes in struct tnl_ops and in tnl_find_port().
d1eb60cc 38 * These values are in the same namespace as other TNL_T_* values, so
842cf6f4
JG
39 * only the least significant 10 bits are available to define protocol
40 * identifiers.
d1eb60cc
JG
41 */
42#define TNL_T_PROTO_GRE 0
e90b1cf9 43#define TNL_T_PROTO_CAPWAP 1
d1eb60cc 44
842cf6f4 45/* These flags are only needed when calling tnl_find_port(). */
d1eb60cc
JG
46#define TNL_T_KEY_EXACT (1 << 10)
47#define TNL_T_KEY_MATCH (1 << 11)
d1eb60cc 48
c19e6535 49/* Private flags not exposed to userspace in this form. */
6455100f
PS
50#define TNL_F_IN_KEY_MATCH (1 << 16) /* Store the key in tun_id to
51 * match in flow table. */
52#define TNL_F_OUT_KEY_ACTION (1 << 17) /* Get the key from a SET_TUNNEL
53 * action. */
c19e6535
BP
54
55/* All public tunnel flags. */
56#define TNL_F_PUBLIC (TNL_F_CSUM | TNL_F_TOS_INHERIT | TNL_F_TTL_INHERIT | \
dc432c2e
AE
57 TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
58 TNL_F_HDR_CACHE | TNL_F_IPSEC)
c19e6535 59
f686a33a
PS
60/**
61 * struct port_lookup_key - Tunnel port key, used as hash table key.
62 * @in_key: Key to match on input, 0 for wildcard.
2a4999f3 63 * @net: Network namespace of the port.
f686a33a
PS
64 * @saddr: IPv4 source address to match, 0 to accept any source address.
65 * @daddr: IPv4 destination of tunnel.
66 * @tunnel_type: Set of TNL_T_* flags that define lookup.
67 */
68struct port_lookup_key {
69 __be64 in_key;
2a4999f3
PS
70#ifdef CONFIG_NET_NS
71 struct net *net;
72#endif
f686a33a
PS
73 __be32 saddr;
74 __be32 daddr;
75 u32 tunnel_type;
76};
77
6455100f 78#define PORT_KEY_LEN (offsetof(struct port_lookup_key, tunnel_type) + \
bd85a68f
PS
79 FIELD_SIZEOF(struct port_lookup_key, tunnel_type))
80
2a4999f3
PS
81static inline struct net *port_key_get_net(const struct port_lookup_key *key)
82{
83 return read_pnet(&key->net);
84}
85
86static inline void port_key_set_net(struct port_lookup_key *key, struct net *net)
87{
88 write_pnet(&key->net, net);
89}
90
c19e6535
BP
91/**
92 * struct tnl_mutable_config - modifiable configuration for a tunnel.
f686a33a
PS
93 * @key: Used as key for tunnel port. Configured via OVS_TUNNEL_ATTR_*
94 * attributes.
c19e6535
BP
95 * @rcu: RCU callback head for deferred destruction.
96 * @seq: Sequence number for distinguishing configuration versions.
c19e6535
BP
97 * @tunnel_hlen: Tunnel header length.
98 * @eth_addr: Source address for packets generated by tunnel itself
99 * (e.g. ICMP fragmentation needed messages).
c19e6535
BP
100 * @out_key: Key to use on output, 0 if this tunnel has no fixed output key.
101 * @flags: TNL_F_* flags.
c19e6535
BP
102 * @tos: IPv4 TOS value to use for tunnel, 0 if no fixed TOS.
103 * @ttl: IPv4 TTL value to use for tunnel, 0 if no fixed TTL.
104 */
d1eb60cc 105struct tnl_mutable_config {
f686a33a 106 struct port_lookup_key key;
d1eb60cc
JG
107 struct rcu_head rcu;
108
c19e6535 109 unsigned seq;
d1eb60cc 110
c19e6535 111 unsigned tunnel_hlen;
842cf6f4
JG
112
113 unsigned char eth_addr[ETH_ALEN];
d1eb60cc 114
df2c07f4 115 /* Configured via OVS_TUNNEL_ATTR_* attributes. */
c19e6535
BP
116 __be64 out_key;
117 u32 flags;
c19e6535
BP
118 u8 tos;
119 u8 ttl;
b37e6334
BP
120
121 /* Multicast configuration. */
122 int mlink;
d1eb60cc
JG
123};
124
125struct tnl_ops {
842cf6f4
JG
126 u32 tunnel_type; /* Put the TNL_T_PROTO_* type in here. */
127 u8 ipproto; /* The IP protocol for the tunnel. */
d1eb60cc 128
5214f5c4 129 /*
842cf6f4 130 * Returns the length of the tunnel header that will be added in
5214f5c4
JG
131 * build_header() (i.e. excludes the IP header). Returns a negative
132 * error code if the configuration is invalid.
133 */
c19e6535 134 int (*hdr_len)(const struct tnl_mutable_config *);
5214f5c4
JG
135
136 /*
842cf6f4
JG
137 * Builds the static portion of the tunnel header, which is stored in
138 * the header cache. In general the performance of this function is
139 * not too important as we try to only call it when building the cache
140 * so it is preferable to shift as much work as possible here. However,
141 * in some circumstances caching is disabled and this function will be
142 * called for every packet, so try not to make it too slow.
143 */
144 void (*build_header)(const struct vport *,
145 const struct tnl_mutable_config *, void *header);
146
147 /*
148 * Updates the cached header of a packet to match the actual packet
149 * data. Typical things that might need to be updated are length,
150 * checksum, etc. The IP header will have already been updated and this
151 * is the final step before transmission. Returns a linked list of
152 * completed SKBs (multiple packets may be generated in the event
153 * of fragmentation).
154 */
155 struct sk_buff *(*update_header)(const struct vport *,
156 const struct tnl_mutable_config *,
157 struct dst_entry *, struct sk_buff *);
158};
159
160#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
161/*
162 * On these kernels we have a fast mechanism to tell if the ARP cache for a
163 * particular destination has changed.
164 */
165#define HAVE_HH_SEQ
166#endif
167#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
168/*
169 * On these kernels we have a fast mechanism to tell if the routing table
170 * has changed.
171 */
172#define HAVE_RT_GENID
173#endif
174#if !defined(HAVE_HH_SEQ) || !defined(HAVE_RT_GENID)
175/* If we can't detect all system changes directly we need to use a timeout. */
176#define NEED_CACHE_TIMEOUT
177#endif
178struct tnl_cache {
179 struct rcu_head rcu;
180
181 int len; /* Length of data to be memcpy'd from cache. */
c2310ade 182 int hh_len; /* Hardware hdr length, cached from hh_cache. */
842cf6f4 183
6455100f
PS
184 /* Sequence number of mutable->seq from which this cache was
185 * generated. */
842cf6f4
JG
186 unsigned mutable_seq;
187
188#ifdef HAVE_HH_SEQ
189 /*
190 * The sequence number from the seqlock protecting the hardware header
191 * cache (in the ARP cache). Since every write increments the counter
192 * this gives us an easy way to tell if it has changed.
193 */
194 unsigned hh_seq;
195#endif
196
197#ifdef NEED_CACHE_TIMEOUT
198 /*
199 * If we don't have direct mechanisms to detect all important changes in
200 * the system fall back to an expiration time. This expiration time
201 * can be relatively short since at high rates there will be millions of
202 * packets per second, so we'll still get plenty of benefit from the
203 * cache. Note that if something changes we may blackhole packets
204 * until the expiration time (depending on what changed and the kernel
205 * version we may be able to detect the change sooner). Expiration is
206 * expressed as a time in jiffies.
5214f5c4 207 */
842cf6f4
JG
208 unsigned long expiration;
209#endif
210
211 /*
212 * The routing table entry that is the result of looking up the tunnel
213 * endpoints. It also contains a sequence number (called a generation
214 * ID) that can be compared to a global sequence to tell if the routing
215 * table has changed (and therefore there is a potential that this
216 * cached route has been invalidated).
217 */
218 struct rtable *rt;
219
220 /*
221 * If the output device for tunnel traffic is an OVS internal device,
222 * the flow of that datapath. Since all tunnel traffic will have the
223 * same headers this allows us to cache the flow lookup. NULL if the
224 * output device is not OVS or if there is no flow installed.
225 */
226 struct sw_flow *flow;
227
228 /* The cached header follows after padding for alignment. */
d1eb60cc
JG
229};
230
231struct tnl_vport {
232 struct rcu_head rcu;
3544358a 233 struct hlist_node hash_node;
d1eb60cc
JG
234
235 char name[IFNAMSIZ];
236 const struct tnl_ops *tnl_ops;
237
e1040c77 238 struct tnl_mutable_config __rcu *mutable;
5214f5c4 239
842cf6f4
JG
240 /*
241 * ID of last fragment sent (for tunnel protocols with direct support
242 * fragmentation). If the protocol relies on IP fragmentation then
243 * this is not needed.
244 */
5214f5c4 245 atomic_t frag_id;
842cf6f4
JG
246
247 spinlock_t cache_lock;
6455100f 248 struct tnl_cache __rcu *cache; /* Protected by RCU/cache_lock. */
842cf6f4
JG
249
250#ifdef NEED_CACHE_TIMEOUT
251 /*
252 * If we must rely on expiration time to invalidate the cache, this is
253 * the interval. It is randomized within a range (defined by
254 * MAX_CACHE_EXP in tunnel.c) to avoid synchronized expirations caused
255 * by creation of a large number of tunnels at a one time.
256 */
257 unsigned long cache_exp_interval;
258#endif
d1eb60cc
JG
259};
260
850b6b3b
JG
261struct vport *ovs_tnl_create(const struct vport_parms *, const struct vport_ops *,
262 const struct tnl_ops *);
263void ovs_tnl_destroy(struct vport *);
264
265int ovs_tnl_set_options(struct vport *, struct nlattr *);
266int ovs_tnl_get_options(const struct vport *, struct sk_buff *);
267
268int ovs_tnl_set_addr(struct vport *vport, const unsigned char *addr);
269const char *ovs_tnl_get_name(const struct vport *vport);
270const unsigned char *ovs_tnl_get_addr(const struct vport *vport);
271int ovs_tnl_send(struct vport *vport, struct sk_buff *skb);
272void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, u8 tos);
273
2a4999f3
PS
274struct vport *ovs_tnl_find_port(struct net *net, __be32 saddr, __be32 daddr,
275 __be64 key, int tunnel_type,
850b6b3b
JG
276 const struct tnl_mutable_config **mutable);
277bool ovs_tnl_frag_needed(struct vport *vport,
278 const struct tnl_mutable_config *mutable,
279 struct sk_buff *skb, unsigned int mtu, __be64 flow_key);
280void ovs_tnl_free_linked_skbs(struct sk_buff *skb);
281
282int ovs_tnl_init(void);
283void ovs_tnl_exit(void);
d1eb60cc
JG
284static inline struct tnl_vport *tnl_vport_priv(const struct vport *vport)
285{
286 return vport_priv(vport);
287}
288
289#endif /* tunnel.h */