]> git.proxmox.com Git - ovs.git/blob - lib/conntrack.h
netdev-dpdk: vhost get stats fix.
[ovs.git] / lib / conntrack.h
1 /*
2 * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef CONNTRACK_H
18 #define CONNTRACK_H 1
19
20 #include <stdbool.h>
21
22 #include "latch.h"
23 #include "odp-netlink.h"
24 #include "openvswitch/hmap.h"
25 #include "openvswitch/list.h"
26 #include "openvswitch/thread.h"
27 #include "openvswitch/types.h"
28 #include "ovs-atomic.h"
29 #include "ovs-thread.h"
30 #include "packets.h"
31
32 /* Userspace connection tracker
33 * ============================
34 *
35 * This is a connection tracking module that keeps all the state in userspace.
36 *
37 * Usage
38 * =====
39 *
40 * struct conntrack ct;
41 *
42 * Initialization:
43 *
44 * conntrack_init(&ct);
45 *
46 * It is necessary to periodically issue a call to
47 *
48 * conntrack_run(&ct);
49 *
50 * to allow the module to clean up expired connections.
51 *
52 * To send a group of packets through the connection tracker:
53 *
54 * conntrack_execute(&ct, pkts, n_pkts, ...);
55 *
56 * Thread-safety
57 * =============
58 *
59 * conntrack_execute() can be called by multiple threads simultaneoulsy.
60 */
61
62 struct dp_packet_batch;
63
64 struct conntrack;
65
66 struct ct_addr {
67 union {
68 ovs_16aligned_be32 ipv4;
69 union ovs_16aligned_in6_addr ipv6;
70 ovs_be32 ipv4_aligned;
71 struct in6_addr ipv6_aligned;
72 };
73 };
74
75 enum nat_action_e {
76 NAT_ACTION_SRC = 1 << 0,
77 NAT_ACTION_SRC_PORT = 1 << 1,
78 NAT_ACTION_DST = 1 << 2,
79 NAT_ACTION_DST_PORT = 1 << 3,
80 };
81
82 struct nat_action_info_t {
83 struct ct_addr min_addr;
84 struct ct_addr max_addr;
85 uint16_t min_port;
86 uint16_t max_port;
87 uint16_t nat_action;
88 };
89
90 void conntrack_init(struct conntrack *);
91 void conntrack_destroy(struct conntrack *);
92
93 int conntrack_execute(struct conntrack *, struct dp_packet_batch *,
94 ovs_be16 dl_type, bool force, bool commit,
95 uint16_t zone, const uint32_t *setmark,
96 const struct ovs_key_ct_labels *setlabel,
97 const char *helper,
98 const struct nat_action_info_t *nat_action_info,
99 long long now);
100
101 struct conntrack_dump {
102 struct conntrack *ct;
103 unsigned bucket;
104 struct hmap_position bucket_pos;
105 bool filter_zone;
106 uint16_t zone;
107 };
108
109 struct ct_dpif_entry;
110
111 int conntrack_dump_start(struct conntrack *, struct conntrack_dump *,
112 const uint16_t *pzone, int *);
113 int conntrack_dump_next(struct conntrack_dump *, struct ct_dpif_entry *);
114 int conntrack_dump_done(struct conntrack_dump *);
115
116 int conntrack_flush(struct conntrack *, const uint16_t *zone);
117 \f
118 /* 'struct ct_lock' is a wrapper for an adaptive mutex. It's useful to try
119 * different types of locks (e.g. spinlocks) */
120
121 struct OVS_LOCKABLE ct_lock {
122 struct ovs_mutex lock;
123 };
124
125 struct OVS_LOCKABLE ct_rwlock {
126 struct ovs_rwlock lock;
127 };
128
129 static inline void ct_lock_init(struct ct_lock *lock)
130 {
131 ovs_mutex_init_adaptive(&lock->lock);
132 }
133
134 static inline void ct_lock_lock(struct ct_lock *lock)
135 OVS_ACQUIRES(lock)
136 OVS_NO_THREAD_SAFETY_ANALYSIS
137 {
138 ovs_mutex_lock(&lock->lock);
139 }
140
141 static inline void ct_lock_unlock(struct ct_lock *lock)
142 OVS_RELEASES(lock)
143 OVS_NO_THREAD_SAFETY_ANALYSIS
144 {
145 ovs_mutex_unlock(&lock->lock);
146 }
147
148 static inline void ct_lock_destroy(struct ct_lock *lock)
149 {
150 ovs_mutex_destroy(&lock->lock);
151 }
152
153 static inline void ct_rwlock_init(struct ct_rwlock *lock)
154 {
155 ovs_rwlock_init(&lock->lock);
156 }
157
158
159 static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
160 OVS_ACQ_WRLOCK(lock)
161 OVS_NO_THREAD_SAFETY_ANALYSIS
162 {
163 ovs_rwlock_wrlock(&lock->lock);
164 }
165
166 static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
167 OVS_ACQ_RDLOCK(lock)
168 OVS_NO_THREAD_SAFETY_ANALYSIS
169 {
170 ovs_rwlock_rdlock(&lock->lock);
171 }
172
173 static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
174 OVS_RELEASES(lock)
175 OVS_NO_THREAD_SAFETY_ANALYSIS
176 {
177 ovs_rwlock_unlock(&lock->lock);
178 }
179
180 static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
181 {
182 ovs_rwlock_destroy(&lock->lock);
183 }
184
185 \f
186 /* Timeouts: all the possible timeout states passed to update_expiration()
187 * are listed here. The name will be prefix by CT_TM_ and the value is in
188 * milliseconds */
189 #define CT_TIMEOUTS \
190 CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
191 CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
192 CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
193 CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
194 CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
195 CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
196 CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
197 CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
198 CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
199 CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
200 CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
201
202 /* The smallest of the above values: it is used as an upper bound for the
203 * interval between two rounds of cleanup of expired entries */
204 #define CT_TM_MIN (30 * 1000)
205
206 #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
207 CT_TIMEOUTS
208 #undef CT_TIMEOUT
209
210 enum ct_timeout {
211 #define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
212 CT_TIMEOUTS
213 #undef CT_TIMEOUT
214 N_CT_TM
215 };
216
217 /* Locking:
218 *
219 * The connections are kept in different buckets, which are completely
220 * independent. The connection bucket is determined by the hash of its key.
221 *
222 * Each bucket has two locks. Acquisition order is, from outermost to
223 * innermost:
224 *
225 * cleanup_mutex
226 * lock
227 *
228 * */
229 struct conntrack_bucket {
230 /* Protects 'connections' and 'exp_lists'. Used in the fast path */
231 struct ct_lock lock;
232 /* Contains the connections in the bucket, indexed by 'struct conn_key' */
233 struct hmap connections OVS_GUARDED;
234 /* For each possible timeout we have a list of connections. When the
235 * timeout of a connection is updated, we move it to the back of the list.
236 * Since the connection in a list have the same relative timeout, the list
237 * will be ordered, with the oldest connections to the front. */
238 struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
239
240 /* Protects 'next_cleanup'. Used to make sure that there's only one thread
241 * performing the cleanup. */
242 struct ovs_mutex cleanup_mutex;
243 long long next_cleanup OVS_GUARDED;
244 };
245
246 #define CONNTRACK_BUCKETS_SHIFT 8
247 #define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)
248
249 struct conntrack {
250 /* Independent buckets containing the connections */
251 struct conntrack_bucket buckets[CONNTRACK_BUCKETS];
252
253 /* Salt for hashing a connection key. */
254 uint32_t hash_basis;
255
256 /* The thread performing periodic cleanup of the connection
257 * tracker */
258 pthread_t clean_thread;
259 /* Latch to destroy the 'clean_thread' */
260 struct latch clean_thread_exit;
261
262 /* Number of connections currently in the connection tracker. */
263 atomic_count n_conn;
264 /* Connections limit. When this limit is reached, no new connection
265 * will be accepted. */
266 atomic_uint n_conn_limit;
267
268 /* The following resources are referenced during nat connection
269 * creation and deletion. */
270 struct hmap nat_conn_keys OVS_GUARDED;
271 /* Hash table for alg expectations. Expectations are created
272 * by control connections to help create data connections. */
273 struct hmap alg_expectations OVS_GUARDED;
274 /* Expiry list for alg expectations. */
275 struct ovs_list alg_exp_list OVS_GUARDED;
276 /* This lock is used during NAT connection creation and deletion;
277 * it is taken after a bucket lock and given back before that
278 * bucket unlock.
279 * This lock is similarly used to guard alg_expectations and
280 * alg_exp_list. If a bucket lock is also held during the normal
281 * code flow, then is must be taken first first and released last.
282 */
283 struct ct_rwlock resources_lock;
284
285 };
286
287 #endif /* conntrack.h */