]>
Commit | Line | Data |
---|---|---|
a489b168 | 1 | /* |
bd5e81a0 | 2 | * Copyright (c) 2015, 2016, 2017 Nicira, Inc. |
a489b168 DDP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #ifndef CONNTRACK_H | |
18 | #define CONNTRACK_H 1 | |
19 | ||
20 | #include <stdbool.h> | |
21 | ||
e6ef6cc6 | 22 | #include "latch.h" |
a489b168 DDP |
23 | #include "odp-netlink.h" |
24 | #include "openvswitch/hmap.h" | |
e6ef6cc6 | 25 | #include "openvswitch/list.h" |
a489b168 DDP |
26 | #include "openvswitch/thread.h" |
27 | #include "openvswitch/types.h" | |
28 | #include "ovs-atomic.h" | |
4cddb1f0 DB |
29 | #include "ovs-thread.h" |
30 | #include "packets.h" | |
4417ca3d | 31 | #include "hindex.h" |
a489b168 DDP |
32 | |
33 | /* Userspace connection tracker | |
34 | * ============================ | |
35 | * | |
36 | * This is a connection tracking module that keeps all the state in userspace. | |
37 | * | |
38 | * Usage | |
39 | * ===== | |
40 | * | |
41 | * struct conntrack ct; | |
42 | * | |
43 | * Initialization: | |
44 | * | |
45 | * conntrack_init(&ct); | |
46 | * | |
47 | * It is necessary to periodically issue a call to | |
48 | * | |
49 | * conntrack_run(&ct); | |
50 | * | |
51 | * to allow the module to clean up expired connections. | |
52 | * | |
53 | * To send a group of packets through the connection tracker: | |
54 | * | |
55 | * conntrack_execute(&ct, pkts, n_pkts, ...); | |
56 | * | |
57 | * Thread-safety | |
58 | * ============= | |
59 | * | |
60 | * conntrack_execute() can be called by multiple threads simultaneoulsy. | |
61 | */ | |
62 | ||
63 | struct dp_packet_batch; | |
64 | ||
65 | struct conntrack; | |
66 | ||
4cddb1f0 DB |
67 | struct ct_addr { |
68 | union { | |
69 | ovs_16aligned_be32 ipv4; | |
70 | union ovs_16aligned_in6_addr ipv6; | |
71 | ovs_be32 ipv4_aligned; | |
72 | struct in6_addr ipv6_aligned; | |
73 | }; | |
74 | }; | |
75 | ||
76 | enum nat_action_e { | |
77 | NAT_ACTION_SRC = 1 << 0, | |
78 | NAT_ACTION_SRC_PORT = 1 << 1, | |
79 | NAT_ACTION_DST = 1 << 2, | |
80 | NAT_ACTION_DST_PORT = 1 << 3, | |
81 | }; | |
82 | ||
83 | struct nat_action_info_t { | |
84 | struct ct_addr min_addr; | |
85 | struct ct_addr max_addr; | |
86 | uint16_t min_port; | |
87 | uint16_t max_port; | |
88 | uint16_t nat_action; | |
89 | }; | |
90 | ||
a489b168 | 91 | void conntrack_init(struct conntrack *); |
a489b168 DDP |
92 | void conntrack_destroy(struct conntrack *); |
93 | ||
bd7d93f8 DB |
94 | int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, |
95 | ovs_be16 dl_type, bool force, bool commit, uint16_t zone, | |
96 | const uint32_t *setmark, | |
286de272 | 97 | const struct ovs_key_ct_labels *setlabel, |
bd7d93f8 | 98 | ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, |
94053e66 FA |
99 | const struct nat_action_info_t *nat_action_info, |
100 | long long now); | |
1fe178d2 | 101 | void conntrack_clear(struct dp_packet *packet); |
4d4e68ed DDP |
102 | |
103 | struct conntrack_dump { | |
104 | struct conntrack *ct; | |
105 | unsigned bucket; | |
106 | struct hmap_position bucket_pos; | |
107 | bool filter_zone; | |
108 | uint16_t zone; | |
109 | }; | |
110 | ||
111 | struct ct_dpif_entry; | |
271e48a0 | 112 | struct ct_dpif_tuple; |
4d4e68ed DDP |
113 | |
114 | int conntrack_dump_start(struct conntrack *, struct conntrack_dump *, | |
ded30c74 | 115 | const uint16_t *pzone, int *); |
4d4e68ed DDP |
116 | int conntrack_dump_next(struct conntrack_dump *, struct ct_dpif_entry *); |
117 | int conntrack_dump_done(struct conntrack_dump *); | |
5d9cbb4c DDP |
118 | |
119 | int conntrack_flush(struct conntrack *, const uint16_t *zone); | |
271e48a0 YHW |
120 | int conntrack_flush_tuple(struct conntrack *, const struct ct_dpif_tuple *, |
121 | uint16_t zone); | |
c92339ad DB |
122 | int conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns); |
123 | int conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns); | |
875075b3 | 124 | int conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns); |
a489b168 DDP |
125 | \f |
126 | /* 'struct ct_lock' is a wrapper for an adaptive mutex. It's useful to try | |
127 | * different types of locks (e.g. spinlocks) */ | |
128 | ||
129 | struct OVS_LOCKABLE ct_lock { | |
130 | struct ovs_mutex lock; | |
131 | }; | |
132 | ||
286de272 DB |
133 | struct OVS_LOCKABLE ct_rwlock { |
134 | struct ovs_rwlock lock; | |
135 | }; | |
136 | ||
a489b168 DDP |
137 | static inline void ct_lock_init(struct ct_lock *lock) |
138 | { | |
139 | ovs_mutex_init_adaptive(&lock->lock); | |
140 | } | |
141 | ||
142 | static inline void ct_lock_lock(struct ct_lock *lock) | |
143 | OVS_ACQUIRES(lock) | |
144 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
145 | { | |
146 | ovs_mutex_lock(&lock->lock); | |
147 | } | |
148 | ||
149 | static inline void ct_lock_unlock(struct ct_lock *lock) | |
150 | OVS_RELEASES(lock) | |
151 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
152 | { | |
153 | ovs_mutex_unlock(&lock->lock); | |
154 | } | |
155 | ||
156 | static inline void ct_lock_destroy(struct ct_lock *lock) | |
157 | { | |
158 | ovs_mutex_destroy(&lock->lock); | |
159 | } | |
286de272 DB |
160 | |
161 | static inline void ct_rwlock_init(struct ct_rwlock *lock) | |
162 | { | |
163 | ovs_rwlock_init(&lock->lock); | |
164 | } | |
165 | ||
166 | ||
167 | static inline void ct_rwlock_wrlock(struct ct_rwlock *lock) | |
168 | OVS_ACQ_WRLOCK(lock) | |
169 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
170 | { | |
171 | ovs_rwlock_wrlock(&lock->lock); | |
172 | } | |
173 | ||
174 | static inline void ct_rwlock_rdlock(struct ct_rwlock *lock) | |
175 | OVS_ACQ_RDLOCK(lock) | |
176 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
177 | { | |
178 | ovs_rwlock_rdlock(&lock->lock); | |
179 | } | |
180 | ||
181 | static inline void ct_rwlock_unlock(struct ct_rwlock *lock) | |
182 | OVS_RELEASES(lock) | |
183 | OVS_NO_THREAD_SAFETY_ANALYSIS | |
184 | { | |
185 | ovs_rwlock_unlock(&lock->lock); | |
186 | } | |
187 | ||
188 | static inline void ct_rwlock_destroy(struct ct_rwlock *lock) | |
189 | { | |
190 | ovs_rwlock_destroy(&lock->lock); | |
191 | } | |
192 | ||
a489b168 DDP |
193 | \f |
194 | /* Timeouts: all the possible timeout states passed to update_expiration() | |
195 | * are listed here. The name will be prefix by CT_TM_ and the value is in | |
196 | * milliseconds */ | |
197 | #define CT_TIMEOUTS \ | |
198 | CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \ | |
199 | CT_TIMEOUT(TCP_OPENING, 30 * 1000) \ | |
200 | CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \ | |
201 | CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \ | |
202 | CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \ | |
203 | CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \ | |
204 | CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \ | |
205 | CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \ | |
206 | CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \ | |
b269a122 DDP |
207 | CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \ |
208 | CT_TIMEOUT(ICMP_REPLY, 30 * 1000) | |
a489b168 | 209 | |
e6ef6cc6 DDP |
210 | /* The smallest of the above values: it is used as an upper bound for the |
211 | * interval between two rounds of cleanup of expired entries */ | |
212 | #define CT_TM_MIN (30 * 1000) | |
213 | ||
214 | #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN); | |
215 | CT_TIMEOUTS | |
216 | #undef CT_TIMEOUT | |
217 | ||
a489b168 DDP |
218 | enum ct_timeout { |
219 | #define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME, | |
220 | CT_TIMEOUTS | |
221 | #undef CT_TIMEOUT | |
222 | N_CT_TM | |
223 | }; | |
224 | ||
225 | /* Locking: | |
226 | * | |
227 | * The connections are kept in different buckets, which are completely | |
228 | * independent. The connection bucket is determined by the hash of its key. | |
e6ef6cc6 DDP |
229 | * |
230 | * Each bucket has two locks. Acquisition order is, from outermost to | |
231 | * innermost: | |
232 | * | |
233 | * cleanup_mutex | |
234 | * lock | |
235 | * | |
a489b168 DDP |
236 | * */ |
237 | struct conntrack_bucket { | |
e6ef6cc6 | 238 | /* Protects 'connections' and 'exp_lists'. Used in the fast path */ |
a489b168 | 239 | struct ct_lock lock; |
e6ef6cc6 | 240 | /* Contains the connections in the bucket, indexed by 'struct conn_key' */ |
a489b168 | 241 | struct hmap connections OVS_GUARDED; |
e6ef6cc6 DDP |
242 | /* For each possible timeout we have a list of connections. When the |
243 | * timeout of a connection is updated, we move it to the back of the list. | |
244 | * Since the connection in a list have the same relative timeout, the list | |
245 | * will be ordered, with the oldest connections to the front. */ | |
246 | struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; | |
247 | ||
248 | /* Protects 'next_cleanup'. Used to make sure that there's only one thread | |
249 | * performing the cleanup. */ | |
250 | struct ovs_mutex cleanup_mutex; | |
251 | long long next_cleanup OVS_GUARDED; | |
a489b168 DDP |
252 | }; |
253 | ||
254 | #define CONNTRACK_BUCKETS_SHIFT 8 | |
255 | #define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT) | |
256 | ||
257 | struct conntrack { | |
258 | /* Independent buckets containing the connections */ | |
259 | struct conntrack_bucket buckets[CONNTRACK_BUCKETS]; | |
260 | ||
261 | /* Salt for hashing a connection key. */ | |
262 | uint32_t hash_basis; | |
263 | ||
e6ef6cc6 DDP |
264 | /* The thread performing periodic cleanup of the connection |
265 | * tracker */ | |
266 | pthread_t clean_thread; | |
267 | /* Latch to destroy the 'clean_thread' */ | |
268 | struct latch clean_thread_exit; | |
269 | ||
a489b168 DDP |
270 | /* Number of connections currently in the connection tracker. */ |
271 | atomic_count n_conn; | |
272 | /* Connections limit. When this limit is reached, no new connection | |
273 | * will be accepted. */ | |
274 | atomic_uint n_conn_limit; | |
286de272 DB |
275 | |
276 | /* The following resources are referenced during nat connection | |
277 | * creation and deletion. */ | |
278 | struct hmap nat_conn_keys OVS_GUARDED; | |
bd5e81a0 DB |
279 | /* Hash table for alg expectations. Expectations are created |
280 | * by control connections to help create data connections. */ | |
281 | struct hmap alg_expectations OVS_GUARDED; | |
4417ca3d DB |
282 | /* Used to lookup alg expectations from the control context. */ |
283 | struct hindex alg_expectation_refs OVS_GUARDED; | |
bd5e81a0 DB |
284 | /* Expiry list for alg expectations. */ |
285 | struct ovs_list alg_exp_list OVS_GUARDED; | |
286de272 DB |
286 | /* This lock is used during NAT connection creation and deletion; |
287 | * it is taken after a bucket lock and given back before that | |
288 | * bucket unlock. | |
bd5e81a0 | 289 | * This lock is similarly used to guard alg_expectations and |
4417ca3d DB |
290 | * alg_expectation_refs. If a bucket lock is also held during |
291 | * the normal code flow, then is must be taken first and released | |
292 | * last. | |
286de272 | 293 | */ |
8b934ced | 294 | struct ct_rwlock resources_lock; |
286de272 | 295 | |
a489b168 DDP |
296 | }; |
297 | ||
298 | #endif /* conntrack.h */ |