]> git.proxmox.com Git - ovs.git/blame - lib/conntrack.h
ofp-port: Drop of useless indirection in ofputil_pull_ofp14_port_stats().
[ovs.git] / lib / conntrack.h
CommitLineData
a489b168 1/*
bd5e81a0 2 * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
a489b168
DDP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef CONNTRACK_H
18#define CONNTRACK_H 1
19
20#include <stdbool.h>
21
e6ef6cc6 22#include "latch.h"
a489b168
DDP
23#include "odp-netlink.h"
24#include "openvswitch/hmap.h"
e6ef6cc6 25#include "openvswitch/list.h"
a489b168
DDP
26#include "openvswitch/thread.h"
27#include "openvswitch/types.h"
28#include "ovs-atomic.h"
4cddb1f0
DB
29#include "ovs-thread.h"
30#include "packets.h"
4417ca3d 31#include "hindex.h"
a489b168
DDP
32
33/* Userspace connection tracker
34 * ============================
35 *
36 * This is a connection tracking module that keeps all the state in userspace.
37 *
38 * Usage
39 * =====
40 *
41 * struct conntrack ct;
42 *
43 * Initialization:
44 *
45 * conntrack_init(&ct);
46 *
47 * It is necessary to periodically issue a call to
48 *
49 * conntrack_run(&ct);
50 *
51 * to allow the module to clean up expired connections.
52 *
53 * To send a group of packets through the connection tracker:
54 *
55 * conntrack_execute(&ct, pkts, n_pkts, ...);
56 *
57 * Thread-safety
58 * =============
59 *
60 * conntrack_execute() can be called by multiple threads simultaneoulsy.
61 */
62
63struct dp_packet_batch;
64
65struct conntrack;
66
4cddb1f0
DB
67struct ct_addr {
68 union {
69 ovs_16aligned_be32 ipv4;
70 union ovs_16aligned_in6_addr ipv6;
71 ovs_be32 ipv4_aligned;
72 struct in6_addr ipv6_aligned;
73 };
74};
75
76enum nat_action_e {
77 NAT_ACTION_SRC = 1 << 0,
78 NAT_ACTION_SRC_PORT = 1 << 1,
79 NAT_ACTION_DST = 1 << 2,
80 NAT_ACTION_DST_PORT = 1 << 3,
81};
82
83struct nat_action_info_t {
84 struct ct_addr min_addr;
85 struct ct_addr max_addr;
86 uint16_t min_port;
87 uint16_t max_port;
88 uint16_t nat_action;
89};
90
a489b168 91void conntrack_init(struct conntrack *);
a489b168
DDP
92void conntrack_destroy(struct conntrack *);
93
bd7d93f8
DB
94int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
95 ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
96 const uint32_t *setmark,
286de272 97 const struct ovs_key_ct_labels *setlabel,
bd7d93f8 98 ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
94053e66
FA
99 const struct nat_action_info_t *nat_action_info,
100 long long now);
1fe178d2 101void conntrack_clear(struct dp_packet *packet);
4d4e68ed
DDP
102
103struct conntrack_dump {
104 struct conntrack *ct;
105 unsigned bucket;
106 struct hmap_position bucket_pos;
107 bool filter_zone;
108 uint16_t zone;
109};
110
111struct ct_dpif_entry;
271e48a0 112struct ct_dpif_tuple;
4d4e68ed
DDP
113
114int conntrack_dump_start(struct conntrack *, struct conntrack_dump *,
ded30c74 115 const uint16_t *pzone, int *);
4d4e68ed
DDP
116int conntrack_dump_next(struct conntrack_dump *, struct ct_dpif_entry *);
117int conntrack_dump_done(struct conntrack_dump *);
5d9cbb4c
DDP
118
119int conntrack_flush(struct conntrack *, const uint16_t *zone);
271e48a0
YHW
120int conntrack_flush_tuple(struct conntrack *, const struct ct_dpif_tuple *,
121 uint16_t zone);
c92339ad
DB
122int conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns);
123int conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns);
875075b3 124int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns);
a489b168
DDP
125\f
126/* 'struct ct_lock' is a wrapper for an adaptive mutex. It's useful to try
127 * different types of locks (e.g. spinlocks) */
128
129struct OVS_LOCKABLE ct_lock {
130 struct ovs_mutex lock;
131};
132
286de272
DB
133struct OVS_LOCKABLE ct_rwlock {
134 struct ovs_rwlock lock;
135};
136
a489b168
DDP
137static inline void ct_lock_init(struct ct_lock *lock)
138{
139 ovs_mutex_init_adaptive(&lock->lock);
140}
141
142static inline void ct_lock_lock(struct ct_lock *lock)
143 OVS_ACQUIRES(lock)
144 OVS_NO_THREAD_SAFETY_ANALYSIS
145{
146 ovs_mutex_lock(&lock->lock);
147}
148
149static inline void ct_lock_unlock(struct ct_lock *lock)
150 OVS_RELEASES(lock)
151 OVS_NO_THREAD_SAFETY_ANALYSIS
152{
153 ovs_mutex_unlock(&lock->lock);
154}
155
156static inline void ct_lock_destroy(struct ct_lock *lock)
157{
158 ovs_mutex_destroy(&lock->lock);
159}
286de272
DB
160
161static inline void ct_rwlock_init(struct ct_rwlock *lock)
162{
163 ovs_rwlock_init(&lock->lock);
164}
165
166
167static inline void ct_rwlock_wrlock(struct ct_rwlock *lock)
168 OVS_ACQ_WRLOCK(lock)
169 OVS_NO_THREAD_SAFETY_ANALYSIS
170{
171 ovs_rwlock_wrlock(&lock->lock);
172}
173
174static inline void ct_rwlock_rdlock(struct ct_rwlock *lock)
175 OVS_ACQ_RDLOCK(lock)
176 OVS_NO_THREAD_SAFETY_ANALYSIS
177{
178 ovs_rwlock_rdlock(&lock->lock);
179}
180
181static inline void ct_rwlock_unlock(struct ct_rwlock *lock)
182 OVS_RELEASES(lock)
183 OVS_NO_THREAD_SAFETY_ANALYSIS
184{
185 ovs_rwlock_unlock(&lock->lock);
186}
187
188static inline void ct_rwlock_destroy(struct ct_rwlock *lock)
189{
190 ovs_rwlock_destroy(&lock->lock);
191}
192
a489b168
DDP
193\f
194/* Timeouts: all the possible timeout states passed to update_expiration()
195 * are listed here. The name will be prefix by CT_TM_ and the value is in
196 * milliseconds */
197#define CT_TIMEOUTS \
198 CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
199 CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
200 CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
201 CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
202 CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
203 CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
204 CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
205 CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
206 CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
b269a122
DDP
207 CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
208 CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
a489b168 209
e6ef6cc6
DDP
210/* The smallest of the above values: it is used as an upper bound for the
211 * interval between two rounds of cleanup of expired entries */
212#define CT_TM_MIN (30 * 1000)
213
214#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
215 CT_TIMEOUTS
216#undef CT_TIMEOUT
217
a489b168
DDP
218enum ct_timeout {
219#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
220 CT_TIMEOUTS
221#undef CT_TIMEOUT
222 N_CT_TM
223};
224
225/* Locking:
226 *
227 * The connections are kept in different buckets, which are completely
228 * independent. The connection bucket is determined by the hash of its key.
e6ef6cc6
DDP
229 *
230 * Each bucket has two locks. Acquisition order is, from outermost to
231 * innermost:
232 *
233 * cleanup_mutex
234 * lock
235 *
a489b168
DDP
236 * */
237struct conntrack_bucket {
e6ef6cc6 238 /* Protects 'connections' and 'exp_lists'. Used in the fast path */
a489b168 239 struct ct_lock lock;
e6ef6cc6 240 /* Contains the connections in the bucket, indexed by 'struct conn_key' */
a489b168 241 struct hmap connections OVS_GUARDED;
e6ef6cc6
DDP
242 /* For each possible timeout we have a list of connections. When the
243 * timeout of a connection is updated, we move it to the back of the list.
244 * Since the connection in a list have the same relative timeout, the list
245 * will be ordered, with the oldest connections to the front. */
246 struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
247
248 /* Protects 'next_cleanup'. Used to make sure that there's only one thread
249 * performing the cleanup. */
250 struct ovs_mutex cleanup_mutex;
251 long long next_cleanup OVS_GUARDED;
a489b168
DDP
252};
253
254#define CONNTRACK_BUCKETS_SHIFT 8
255#define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)
256
257struct conntrack {
258 /* Independent buckets containing the connections */
259 struct conntrack_bucket buckets[CONNTRACK_BUCKETS];
260
261 /* Salt for hashing a connection key. */
262 uint32_t hash_basis;
263
e6ef6cc6
DDP
264 /* The thread performing periodic cleanup of the connection
265 * tracker */
266 pthread_t clean_thread;
267 /* Latch to destroy the 'clean_thread' */
268 struct latch clean_thread_exit;
269
a489b168
DDP
270 /* Number of connections currently in the connection tracker. */
271 atomic_count n_conn;
272 /* Connections limit. When this limit is reached, no new connection
273 * will be accepted. */
274 atomic_uint n_conn_limit;
286de272
DB
275
276 /* The following resources are referenced during nat connection
277 * creation and deletion. */
278 struct hmap nat_conn_keys OVS_GUARDED;
bd5e81a0
DB
279 /* Hash table for alg expectations. Expectations are created
280 * by control connections to help create data connections. */
281 struct hmap alg_expectations OVS_GUARDED;
4417ca3d
DB
282 /* Used to lookup alg expectations from the control context. */
283 struct hindex alg_expectation_refs OVS_GUARDED;
bd5e81a0
DB
284 /* Expiry list for alg expectations. */
285 struct ovs_list alg_exp_list OVS_GUARDED;
286de272
DB
286 /* This lock is used during NAT connection creation and deletion;
287 * it is taken after a bucket lock and given back before that
288 * bucket unlock.
bd5e81a0 289 * This lock is similarly used to guard alg_expectations and
4417ca3d
DB
290 * alg_expectation_refs. If a bucket lock is also held during
291 * the normal code flow, then is must be taken first and released
292 * last.
286de272 293 */
8b934ced 294 struct ct_rwlock resources_lock;
286de272 295
a489b168
DDP
296};
297
298#endif /* conntrack.h */