2 * Copyright (c) 2015, 2016, 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
23 #include "odp-netlink.h"
24 #include "openvswitch/hmap.h"
25 #include "openvswitch/list.h"
26 #include "openvswitch/thread.h"
27 #include "openvswitch/types.h"
28 #include "ovs-atomic.h"
29 #include "ovs-thread.h"
33 /* Userspace connection tracker
34 * ============================
36 * This is a connection tracking module that keeps all the state in userspace.
41 * struct conntrack ct;
45 * conntrack_init(&ct);
47 * It is necessary to periodically issue a call to
51 * to allow the module to clean up expired connections.
53 * To send a group of packets through the connection tracker:
55 * conntrack_execute(&ct, pkts, n_pkts, ...);
60 * conntrack_execute() can be called by multiple threads simultaneoulsy.
63 struct dp_packet_batch
;
69 ovs_16aligned_be32 ipv4
;
70 union ovs_16aligned_in6_addr ipv6
;
71 ovs_be32 ipv4_aligned
;
72 struct in6_addr ipv6_aligned
;
77 NAT_ACTION_SRC
= 1 << 0,
78 NAT_ACTION_SRC_PORT
= 1 << 1,
79 NAT_ACTION_DST
= 1 << 2,
80 NAT_ACTION_DST_PORT
= 1 << 3,
83 struct nat_action_info_t
{
84 struct ct_addr min_addr
;
85 struct ct_addr max_addr
;
91 void conntrack_init(struct conntrack
*);
92 void conntrack_destroy(struct conntrack
*);
94 int conntrack_execute(struct conntrack
*ct
, struct dp_packet_batch
*pkt_batch
,
95 ovs_be16 dl_type
, bool force
, bool commit
, uint16_t zone
,
96 const uint32_t *setmark
,
97 const struct ovs_key_ct_labels
*setlabel
,
98 ovs_be16 tp_src
, ovs_be16 tp_dst
, const char *helper
,
99 const struct nat_action_info_t
*nat_action_info
,
101 void conntrack_clear(struct dp_packet
*packet
);
103 struct conntrack_dump
{
104 struct conntrack
*ct
;
106 struct hmap_position bucket_pos
;
111 struct ct_dpif_entry
;
112 struct ct_dpif_tuple
;
114 int conntrack_dump_start(struct conntrack
*, struct conntrack_dump
*,
115 const uint16_t *pzone
, int *);
116 int conntrack_dump_next(struct conntrack_dump
*, struct ct_dpif_entry
*);
117 int conntrack_dump_done(struct conntrack_dump
*);
119 int conntrack_flush(struct conntrack
*, const uint16_t *zone
);
120 int conntrack_flush_tuple(struct conntrack
*, const struct ct_dpif_tuple
*,
122 int conntrack_set_maxconns(struct conntrack
*ct
, uint32_t maxconns
);
123 int conntrack_get_maxconns(struct conntrack
*ct
, uint32_t *maxconns
);
124 int conntrack_get_nconns(struct conntrack
*ct
, uint32_t *nconns
);
126 /* 'struct ct_lock' is a wrapper for an adaptive mutex. It's useful to try
127 * different types of locks (e.g. spinlocks) */
129 struct OVS_LOCKABLE ct_lock
{
130 struct ovs_mutex lock
;
133 struct OVS_LOCKABLE ct_rwlock
{
134 struct ovs_rwlock lock
;
137 static inline void ct_lock_init(struct ct_lock
*lock
)
139 ovs_mutex_init_adaptive(&lock
->lock
);
142 static inline void ct_lock_lock(struct ct_lock
*lock
)
144 OVS_NO_THREAD_SAFETY_ANALYSIS
146 ovs_mutex_lock(&lock
->lock
);
149 static inline void ct_lock_unlock(struct ct_lock
*lock
)
151 OVS_NO_THREAD_SAFETY_ANALYSIS
153 ovs_mutex_unlock(&lock
->lock
);
156 static inline void ct_lock_destroy(struct ct_lock
*lock
)
158 ovs_mutex_destroy(&lock
->lock
);
161 static inline void ct_rwlock_init(struct ct_rwlock
*lock
)
163 ovs_rwlock_init(&lock
->lock
);
167 static inline void ct_rwlock_wrlock(struct ct_rwlock
*lock
)
169 OVS_NO_THREAD_SAFETY_ANALYSIS
171 ovs_rwlock_wrlock(&lock
->lock
);
174 static inline void ct_rwlock_rdlock(struct ct_rwlock
*lock
)
176 OVS_NO_THREAD_SAFETY_ANALYSIS
178 ovs_rwlock_rdlock(&lock
->lock
);
181 static inline void ct_rwlock_unlock(struct ct_rwlock
*lock
)
183 OVS_NO_THREAD_SAFETY_ANALYSIS
185 ovs_rwlock_unlock(&lock
->lock
);
188 static inline void ct_rwlock_destroy(struct ct_rwlock
*lock
)
190 ovs_rwlock_destroy(&lock
->lock
);
194 /* Timeouts: all the possible timeout states passed to update_expiration()
195 * are listed here. The name will be prefix by CT_TM_ and the value is in
197 #define CT_TIMEOUTS \
198 CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
199 CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
200 CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
201 CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
202 CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
203 CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
204 CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
205 CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
206 CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
207 CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
208 CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
210 /* The smallest of the above values: it is used as an upper bound for the
211 * interval between two rounds of cleanup of expired entries */
212 #define CT_TM_MIN (30 * 1000)
214 #define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
219 #define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
227 * The connections are kept in different buckets, which are completely
228 * independent. The connection bucket is determined by the hash of its key.
230 * Each bucket has two locks. Acquisition order is, from outermost to
237 struct conntrack_bucket
{
238 /* Protects 'connections' and 'exp_lists'. Used in the fast path */
240 /* Contains the connections in the bucket, indexed by 'struct conn_key' */
241 struct hmap connections OVS_GUARDED
;
242 /* For each possible timeout we have a list of connections. When the
243 * timeout of a connection is updated, we move it to the back of the list.
244 * Since the connection in a list have the same relative timeout, the list
245 * will be ordered, with the oldest connections to the front. */
246 struct ovs_list exp_lists
[N_CT_TM
] OVS_GUARDED
;
248 /* Protects 'next_cleanup'. Used to make sure that there's only one thread
249 * performing the cleanup. */
250 struct ovs_mutex cleanup_mutex
;
251 long long next_cleanup OVS_GUARDED
;
254 #define CONNTRACK_BUCKETS_SHIFT 8
255 #define CONNTRACK_BUCKETS (1 << CONNTRACK_BUCKETS_SHIFT)
258 /* Independent buckets containing the connections */
259 struct conntrack_bucket buckets
[CONNTRACK_BUCKETS
];
261 /* Salt for hashing a connection key. */
264 /* The thread performing periodic cleanup of the connection
266 pthread_t clean_thread
;
267 /* Latch to destroy the 'clean_thread' */
268 struct latch clean_thread_exit
;
270 /* Number of connections currently in the connection tracker. */
272 /* Connections limit. When this limit is reached, no new connection
273 * will be accepted. */
274 atomic_uint n_conn_limit
;
276 /* The following resources are referenced during nat connection
277 * creation and deletion. */
278 struct hmap nat_conn_keys OVS_GUARDED
;
279 /* Hash table for alg expectations. Expectations are created
280 * by control connections to help create data connections. */
281 struct hmap alg_expectations OVS_GUARDED
;
282 /* Used to lookup alg expectations from the control context. */
283 struct hindex alg_expectation_refs OVS_GUARDED
;
284 /* Expiry list for alg expectations. */
285 struct ovs_list alg_exp_list OVS_GUARDED
;
286 /* This lock is used during NAT connection creation and deletion;
287 * it is taken after a bucket lock and given back before that
289 * This lock is similarly used to guard alg_expectations and
290 * alg_expectation_refs. If a bucket lock is also held during
291 * the normal code flow, then is must be taken first and released
294 struct ct_rwlock resources_lock
;
298 #endif /* conntrack.h */