]> git.proxmox.com Git - mirror_ovs.git/blame - lib/conntrack.c
conntrack: Fix 'reverse_nat_packet()' variable datatype.
[mirror_ovs.git] / lib / conntrack.c
CommitLineData
a489b168 1/*
4ea96698 2 * Copyright (c) 2015-2019 Nicira, Inc.
a489b168
DDP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
bd5e81a0 18#include <ctype.h>
a489b168 19#include <errno.h>
ff6aa424 20#include <sys/types.h>
a489b168
DDP
21#include <netinet/in.h>
22#include <netinet/icmp6.h>
bd5e81a0 23#include <string.h>
a489b168
DDP
24
25#include "bitmap.h"
bd5e81a0 26#include "conntrack.h"
a489b168
DDP
27#include "conntrack-private.h"
28#include "coverage.h"
29#include "csum.h"
4d4e68ed 30#include "ct-dpif.h"
a489b168
DDP
31#include "dp-packet.h"
32#include "flow.h"
33#include "netdev.h"
34#include "odp-netlink.h"
35#include "openvswitch/hmap.h"
36#include "openvswitch/vlog.h"
37#include "ovs-rcu.h"
e6ef6cc6 38#include "ovs-thread.h"
fd016ae3 39#include "openvswitch/poll-loop.h"
a489b168
DDP
40#include "random.h"
41#include "timeval.h"
42
43VLOG_DEFINE_THIS_MODULE(conntrack);
44
45COVERAGE_DEFINE(conntrack_full);
e6ef6cc6 46COVERAGE_DEFINE(conntrack_long_cleanup);
a489b168
DDP
47
48struct conn_lookup_ctx {
49 struct conn_key key;
50 struct conn *conn;
51 uint32_t hash;
52 bool reply;
dbb597d3 53 bool icmp_related;
a489b168
DDP
54};
55
bd5e81a0
DB
56enum ftp_ctl_pkt {
57 /* Control packets with address and/or port specifiers. */
58 CT_FTP_CTL_INTEREST,
59 /* Control packets without address and/or port specifiers. */
60 CT_FTP_CTL_OTHER,
61 CT_FTP_CTL_INVALID,
62};
63
64enum ct_alg_mode {
65 CT_FTP_MODE_ACTIVE,
66 CT_FTP_MODE_PASSIVE,
7be77cb0 67 CT_TFTP_MODE,
bd5e81a0
DB
68};
69
94e71143
DB
70enum ct_alg_ctl_type {
71 CT_ALG_CTL_NONE,
72 CT_ALG_CTL_FTP,
73 CT_ALG_CTL_TFTP,
be38342d
DB
74 /* SIP is not enabled through Openflow and presently only used as
75 * an example of an alg that allows a wildcard src ip. */
76 CT_ALG_CTL_SIP,
94e71143
DB
77};
78
a489b168 79static bool conn_key_extract(struct conntrack *, struct dp_packet *,
66e4ad8a
DDP
80 ovs_be16 dl_type, struct conn_lookup_ctx *,
81 uint16_t zone);
a489b168
DDP
82static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
83static void conn_key_reverse(struct conn_key *);
a489b168 84static bool valid_new(struct dp_packet *pkt, struct conn_key *);
967bb5c5 85static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
e6ef6cc6 86 struct conn_key *, long long now);
967bb5c5 87static void delete_conn_cmn(struct conn *);
a489b168 88static void delete_conn(struct conn *);
967bb5c5
DB
89static void delete_conn_one(struct conn *conn);
90static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn,
91 struct dp_packet *pkt,
92 struct conn_lookup_ctx *ctx,
e6ef6cc6 93 long long now);
a489b168
DDP
94static bool conn_expired(struct conn *, long long now);
95static void set_mark(struct dp_packet *, struct conn *,
96 uint32_t val, uint32_t mask);
97static void set_label(struct dp_packet *, struct conn *,
98 const struct ovs_key_ct_labels *val,
99 const struct ovs_key_ct_labels *mask);
e6ef6cc6 100static void *clean_thread_main(void *f_);
a489b168 101
286de272
DB
102static bool
103nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
104 struct conn *nat_conn);
105
106static uint8_t
107reverse_icmp_type(uint8_t type);
108static uint8_t
109reverse_icmp6_type(uint8_t type);
110static inline bool
111extract_l3_ipv4(struct conn_key *key, const void *data, size_t size,
112 const char **new_data, bool validate_checksum);
113static inline bool
114extract_l3_ipv6(struct conn_key *key, const void *data, size_t size,
115 const char **new_data);
bd5e81a0 116static struct alg_exp_node *
be38342d
DB
117expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
118 uint32_t basis, bool src_ip_wc);
bd5e81a0
DB
119
120static int
121repl_ftp_v4_addr(struct dp_packet *pkt, ovs_be32 v4_addr_rep,
122 char *ftp_data_v4_start,
cd7c99a6 123 size_t addr_offset_from_ftp_data_start, size_t addr_size);
bd5e81a0
DB
124
125static enum ftp_ctl_pkt
126process_ftp_ctl_v4(struct conntrack *ct,
127 struct dp_packet *pkt,
128 const struct conn *conn_for_expectation,
4417ca3d 129 ovs_be32 *v4_addr_rep,
bd5e81a0 130 char **ftp_data_v4_start,
cd7c99a6
DB
131 size_t *addr_offset_from_ftp_data_start,
132 size_t *addr_size);
bd5e81a0
DB
133
134static enum ftp_ctl_pkt
135detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
136 struct dp_packet *pkt);
137
4417ca3d 138static void
967bb5c5 139expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
4417ca3d 140
94e71143
DB
141static struct ct_l4_proto *l4_protos[] = {
142 [IPPROTO_TCP] = &ct_proto_tcp,
143 [IPPROTO_UDP] = &ct_proto_other,
144 [IPPROTO_ICMP] = &ct_proto_icmp4,
145 [IPPROTO_ICMPV6] = &ct_proto_icmp6,
146};
147
bd5e81a0
DB
148static void
149handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
967bb5c5
DB
150 struct dp_packet *pkt, struct conn *ec, long long now,
151 enum ftp_ctl_pkt ftp_ctl, bool nat);
bd5e81a0 152
7be77cb0
DB
153static void
154handle_tftp_ctl(struct conntrack *ct,
94e71143 155 const struct conn_lookup_ctx *ctx OVS_UNUSED,
967bb5c5
DB
156 struct dp_packet *pkt, struct conn *conn_for_expectation,
157 long long now OVS_UNUSED, enum ftp_ctl_pkt ftp_ctl OVS_UNUSED,
158 bool nat OVS_UNUSED);
94e71143
DB
159
160typedef void (*alg_helper)(struct conntrack *ct,
161 const struct conn_lookup_ctx *ctx,
162 struct dp_packet *pkt,
967bb5c5 163 struct conn *conn_for_expectation,
94e71143
DB
164 long long now, enum ftp_ctl_pkt ftp_ctl,
165 bool nat);
166
167static alg_helper alg_helpers[] = {
168 [CT_ALG_CTL_NONE] = NULL,
169 [CT_ALG_CTL_FTP] = handle_ftp_ctl,
170 [CT_ALG_CTL_TFTP] = handle_tftp_ctl,
a489b168
DDP
171};
172
173long long ct_timeout_val[] = {
174#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL,
175 CT_TIMEOUTS
176#undef CT_TIMEOUT
177};
178
bd5e81a0
DB
179/* The maximum TCP or UDP port number. */
180#define CT_MAX_L4_PORT 65535
bd5e81a0
DB
181/* String buffer used for parsing FTP string messages.
182 * This is sized about twice what is needed to leave some
183 * margin of error. */
184#define LARGEST_FTP_MSG_OF_INTEREST 128
185/* FTP port string used in active mode. */
186#define FTP_PORT_CMD "PORT"
187/* FTP pasv string used in passive mode. */
188#define FTP_PASV_REPLY_CODE "227"
189/* Maximum decimal digits for port in FTP command.
190 * The port is represented as two 3 digit numbers with the
191 * high part a multiple of 256. */
192#define MAX_FTP_PORT_DGTS 3
193
194/* FTP extension EPRT string used for active mode. */
195#define FTP_EPRT_CMD "EPRT"
196/* FTP extension EPSV string used for passive mode. */
197#define FTP_EPSV_REPLY "EXTENDED PASSIVE"
198/* Maximum decimal digits for port in FTP extended command. */
199#define MAX_EXT_FTP_PORT_DGTS 5
200/* FTP extended command code for IPv6. */
201#define FTP_AF_V6 '2'
202/* Used to indicate a wildcard L4 source port number for ALGs.
203 * This is used for port numbers that we cannot predict in
204 * expectations. */
205#define ALG_WC_SRC_PORT 0
206
a489b168 207/* If the total number of connections goes above this value, no new connections
286de272 208 * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */
a489b168
DDP
209#define DEFAULT_N_CONN_LIMIT 3000000
210
5ed7a0b4
DB
211/* Does a member by member comparison of two conn_keys; this
212 * function must be kept in sync with struct conn_key; returns 0
213 * if the keys are equal or 1 if the keys are not equal. */
214static int
215conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2)
216{
217 if (!memcmp(&key1->src.addr, &key2->src.addr, sizeof key1->src.addr) &&
218 !memcmp(&key1->dst.addr, &key2->dst.addr, sizeof key1->dst.addr) &&
219 (key1->src.icmp_id == key2->src.icmp_id) &&
220 (key1->src.icmp_type == key2->src.icmp_type) &&
221 (key1->src.icmp_code == key2->src.icmp_code) &&
222 (key1->dst.icmp_id == key2->dst.icmp_id) &&
223 (key1->dst.icmp_type == key2->dst.icmp_type) &&
224 (key1->dst.icmp_code == key2->dst.icmp_code) &&
225 (key1->dl_type == key2->dl_type) &&
226 (key1->zone == key2->zone) &&
227 (key1->nw_proto == key2->nw_proto)) {
228
229 return 0;
230 }
231 return 1;
232}
233
d8682ee5 234static void
dec0dbbc
DB
235ct_print_conn_info(const struct conn *c, const char *log_msg,
236 enum vlog_level vll, bool force, bool rl_on)
66f400f5
DB
237{
238#define CT_VLOG(RL_ON, LEVEL, ...) \
239 do { \
240 if (RL_ON) { \
241 static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 5); \
242 vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__); \
243 } else { \
244 vlog(&this_module, LEVEL, __VA_ARGS__); \
245 } \
246 } while (0)
247
248 if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) {
249 if (c->key.dl_type == htons(ETH_TYPE_IP)) {
250 CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev src "
251 "ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports "
252 "%"PRIu16"/%"PRIu16" rev src/dst ports "
253 "%"PRIu16"/%"PRIu16" zone/rev zone "
254 "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
255 "%"PRIu8"/%"PRIu8, log_msg,
cda1b109
DB
256 IP_ARGS(c->key.src.addr.ipv4),
257 IP_ARGS(c->key.dst.addr.ipv4),
258 IP_ARGS(c->rev_key.src.addr.ipv4),
259 IP_ARGS(c->rev_key.dst.addr.ipv4),
66f400f5
DB
260 ntohs(c->key.src.port), ntohs(c->key.dst.port),
261 ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
262 c->key.zone, c->rev_key.zone, c->key.nw_proto,
263 c->rev_key.nw_proto);
264 } else {
265 char ip6_s[INET6_ADDRSTRLEN];
266 inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof ip6_s);
267 char ip6_d[INET6_ADDRSTRLEN];
268 inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof ip6_d);
269 char ip6_rs[INET6_ADDRSTRLEN];
270 inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs,
271 sizeof ip6_rs);
272 char ip6_rd[INET6_ADDRSTRLEN];
273 inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd,
274 sizeof ip6_rd);
275
276 CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s"
277 " rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16
278 " rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone "
279 "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
280 "%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs,
281 ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port),
282 ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
283 c->key.zone, c->rev_key.zone, c->key.nw_proto,
284 c->rev_key.nw_proto);
285 }
286 }
287}
288
a489b168
DDP
289/* Initializes the connection tracker 'ct'. The caller is responsible for
290 * calling 'conntrack_destroy()', when the instance is not needed anymore */
57593fd2
DB
291struct conntrack *
292conntrack_init(void)
a489b168 293{
57593fd2
DB
294 struct conntrack *ct = xzalloc(sizeof *ct);
295
967bb5c5
DB
296 ovs_rwlock_init(&ct->resources_lock);
297 ovs_rwlock_wrlock(&ct->resources_lock);
bd5e81a0 298 hmap_init(&ct->alg_expectations);
4417ca3d 299 hindex_init(&ct->alg_expectation_refs);
967bb5c5 300 ovs_rwlock_unlock(&ct->resources_lock);
a489b168 301
967bb5c5
DB
302 ovs_mutex_init_adaptive(&ct->ct_lock);
303 ovs_mutex_lock(&ct->ct_lock);
304 cmap_init(&ct->conns);
305 for (unsigned i = 0; i < ARRAY_SIZE(ct->exp_lists); i++) {
306 ovs_list_init(&ct->exp_lists[i]);
a489b168 307 }
967bb5c5
DB
308 ovs_mutex_unlock(&ct->ct_lock);
309
a489b168
DDP
310 ct->hash_basis = random_uint32();
311 atomic_count_init(&ct->n_conn, 0);
312 atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT);
e6ef6cc6
DDP
313 latch_init(&ct->clean_thread_exit);
314 ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
4ea96698 315 ct->ipf = ipf_init();
57593fd2
DB
316
317 return ct;
a489b168
DDP
318}
319
967bb5c5
DB
320static void
321conn_clean_cmn(struct conntrack *ct, struct conn *conn)
322 OVS_REQUIRES(ct->ct_lock)
323{
324 if (conn->alg) {
325 expectation_clean(ct, &conn->key);
326 }
327
328 uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
329 cmap_remove(&ct->conns, &conn->cm_node, hash);
330}
331
332/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also
333 * removes the associated nat 'conn' from the lookup datastructures. */
334static void
335conn_clean(struct conntrack *ct, struct conn *conn)
336 OVS_REQUIRES(ct->ct_lock)
337{
338 ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
339
340 conn_clean_cmn(ct, conn);
341 if (conn->nat_conn) {
342 uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis);
343 cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
344 }
345 ovs_list_remove(&conn->exp_node);
5f918a8a 346 conn->cleaned = true;
967bb5c5
DB
347 ovsrcu_postpone(delete_conn, conn);
348 atomic_count_dec(&ct->n_conn);
349}
350
351static void
352conn_clean_one(struct conntrack *ct, struct conn *conn)
353 OVS_REQUIRES(ct->ct_lock)
354{
355 conn_clean_cmn(ct, conn);
356 if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
357 ovs_list_remove(&conn->exp_node);
5f918a8a 358 conn->cleaned = true;
967bb5c5
DB
359 atomic_count_dec(&ct->n_conn);
360 }
361 ovsrcu_postpone(delete_conn_one, conn);
362}
363
364/* Destroys the connection tracker 'ct' and frees all the allocated memory.
365 * The caller of this function must already have shut down packet input
366 * and PMD threads (which would have been quiesced). */
a489b168
DDP
367void
368conntrack_destroy(struct conntrack *ct)
369{
967bb5c5 370 struct conn *conn;
e6ef6cc6
DDP
371 latch_set(&ct->clean_thread_exit);
372 pthread_join(ct->clean_thread, NULL);
373 latch_destroy(&ct->clean_thread_exit);
a489b168 374
967bb5c5
DB
375 ovs_mutex_lock(&ct->ct_lock);
376 CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
377 conn_clean_one(ct, conn);
a489b168 378 }
967bb5c5
DB
379 cmap_destroy(&ct->conns);
380 ovs_mutex_unlock(&ct->ct_lock);
381 ovs_mutex_destroy(&ct->ct_lock);
bd5e81a0 382
967bb5c5 383 ovs_rwlock_wrlock(&ct->resources_lock);
bd5e81a0
DB
384 struct alg_exp_node *alg_exp_node;
385 HMAP_FOR_EACH_POP (alg_exp_node, node, &ct->alg_expectations) {
386 free(alg_exp_node);
387 }
bd5e81a0 388 hmap_destroy(&ct->alg_expectations);
4417ca3d 389 hindex_destroy(&ct->alg_expectation_refs);
967bb5c5
DB
390 ovs_rwlock_unlock(&ct->resources_lock);
391 ovs_rwlock_destroy(&ct->resources_lock);
392
4ea96698 393 ipf_destroy(ct->ipf);
21ffe409 394 free(ct);
a489b168
DDP
395}
396\f
967bb5c5
DB
397
398static bool
399conn_key_lookup(struct conntrack *ct, const struct conn_key *key,
400 uint32_t hash, long long now, struct conn **conn_out,
401 bool *reply)
a489b168 402{
967bb5c5
DB
403 struct conn *conn;
404 bool found = false;
405
406 CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) {
407 if (!conn_key_cmp(&conn->key, key) && !conn_expired(conn, now)) {
408 found = true;
409 if (reply) {
410 *reply = false;
411 }
412 break;
413 }
414 if (!conn_key_cmp(&conn->rev_key, key) && !conn_expired(conn, now)) {
415 found = true;
416 if (reply) {
417 *reply = true;
418 }
419 break;
420 }
421 }
a489b168 422
967bb5c5
DB
423 if (found && conn_out) {
424 *conn_out = conn;
425 } else if (conn_out) {
426 *conn_out = NULL;
427 }
428 return found;
a489b168
DDP
429}
430
4048c508
DB
431static bool
432conn_lookup(struct conntrack *ct, const struct conn_key *key,
433 long long now, struct conn **conn_out, bool *reply)
434{
435 uint32_t hash = conn_key_hash(key, ct->hash_basis);
436 return conn_key_lookup(ct, key, hash, now, conn_out, reply);
437}
438
a489b168 439static void
286de272 440write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn,
bd5e81a0 441 const struct conn_key *key, const struct alg_exp_node *alg_exp)
a489b168 442{
286de272 443 pkt->md.ct_state |= CS_TRACKED;
a489b168 444 pkt->md.ct_zone = zone;
967bb5c5
DB
445
446 if (conn) {
447 ovs_mutex_lock(&conn->lock);
448 pkt->md.ct_mark = conn->mark;
449 pkt->md.ct_label = conn->label;
450 ovs_mutex_unlock(&conn->lock);
451 } else {
452 pkt->md.ct_mark = 0;
453 pkt->md.ct_label = OVS_U128_ZERO;
454 }
daf4d3c1
JR
455
456 /* Use the original direction tuple if we have it. */
457 if (conn) {
bd5e81a0
DB
458 if (conn->alg_related) {
459 key = &conn->master_key;
460 } else {
461 key = &conn->key;
462 }
463 } else if (alg_exp) {
464 pkt->md.ct_mark = alg_exp->master_mark;
465 pkt->md.ct_label = alg_exp->master_label;
466 key = &alg_exp->master_key;
daf4d3c1 467 }
dec0dbbc 468
daf4d3c1 469 pkt->md.ct_orig_tuple_ipv6 = false;
dec0dbbc 470
daf4d3c1
JR
471 if (key) {
472 if (key->dl_type == htons(ETH_TYPE_IP)) {
473 pkt->md.ct_orig_tuple.ipv4 = (struct ovs_key_ct_tuple_ipv4) {
cda1b109
DB
474 key->src.addr.ipv4,
475 key->dst.addr.ipv4,
daf4d3c1
JR
476 key->nw_proto != IPPROTO_ICMP
477 ? key->src.port : htons(key->src.icmp_type),
478 key->nw_proto != IPPROTO_ICMP
479 ? key->dst.port : htons(key->src.icmp_code),
480 key->nw_proto,
481 };
286de272 482 } else {
daf4d3c1
JR
483 pkt->md.ct_orig_tuple_ipv6 = true;
484 pkt->md.ct_orig_tuple.ipv6 = (struct ovs_key_ct_tuple_ipv6) {
cda1b109
DB
485 key->src.addr.ipv6,
486 key->dst.addr.ipv6,
daf4d3c1
JR
487 key->nw_proto != IPPROTO_ICMPV6
488 ? key->src.port : htons(key->src.icmp_type),
489 key->nw_proto != IPPROTO_ICMPV6
490 ? key->dst.port : htons(key->src.icmp_code),
491 key->nw_proto,
492 };
493 }
494 } else {
495 memset(&pkt->md.ct_orig_tuple, 0, sizeof pkt->md.ct_orig_tuple);
496 }
bd5e81a0
DB
497}
498
499static uint8_t
500get_ip_proto(const struct dp_packet *pkt)
501{
502 uint8_t ip_proto;
503 struct eth_header *l2 = dp_packet_eth(pkt);
504 if (l2->eth_type == htons(ETH_TYPE_IPV6)) {
505 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
506 ip_proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
507 } else {
508 struct ip_header *l3_hdr = dp_packet_l3(pkt);
509 ip_proto = l3_hdr->ip_proto;
510 }
286de272 511
bd5e81a0
DB
512 return ip_proto;
513}
514
515static bool
94e71143 516is_ftp_ctl(const enum ct_alg_ctl_type ct_alg_ctl)
bd5e81a0 517{
94e71143 518 return ct_alg_ctl == CT_ALG_CTL_FTP;
bd5e81a0
DB
519}
520
94e71143 521static enum ct_alg_ctl_type
bd7d93f8
DB
522get_alg_ctl_type(const struct dp_packet *pkt, ovs_be16 tp_src, ovs_be16 tp_dst,
523 const char *helper)
7be77cb0 524{
94e71143
DB
525 /* CT_IPPORT_FTP/TFTP is used because IPPORT_FTP/TFTP in not defined
526 * in OSX, at least in in.h. Since these values will never change, remove
7be77cb0 527 * the external dependency. */
94e71143
DB
528 enum { CT_IPPORT_FTP = 21 };
529 enum { CT_IPPORT_TFTP = 69 };
bd7d93f8
DB
530 uint8_t ip_proto = get_ip_proto(pkt);
531 struct udp_header *uh = dp_packet_l4(pkt);
532 struct tcp_header *th = dp_packet_l4(pkt);
533 ovs_be16 ftp_src_port = htons(CT_IPPORT_FTP);
534 ovs_be16 ftp_dst_port = htons(CT_IPPORT_FTP);
535 ovs_be16 tftp_dst_port = htons(CT_IPPORT_TFTP);
536
537 if (OVS_UNLIKELY(tp_dst)) {
538 if (helper && !strncmp(helper, "ftp", strlen("ftp"))) {
539 ftp_dst_port = tp_dst;
540 } else if (helper && !strncmp(helper, "tftp", strlen("tftp"))) {
541 tftp_dst_port = tp_dst;
542 }
543 } else if (OVS_UNLIKELY(tp_src)) {
544 if (helper && !strncmp(helper, "ftp", strlen("ftp"))) {
545 ftp_src_port = tp_src;
546 }
547 }
7be77cb0 548
bd7d93f8 549 if (ip_proto == IPPROTO_UDP && uh->udp_dst == tftp_dst_port) {
94e71143
DB
550 return CT_ALG_CTL_TFTP;
551 } else if (ip_proto == IPPROTO_TCP &&
bd7d93f8 552 (th->tcp_src == ftp_src_port || th->tcp_dst == ftp_dst_port)) {
94e71143
DB
553 return CT_ALG_CTL_FTP;
554 }
555 return CT_ALG_CTL_NONE;
556}
557
be38342d
DB
558static bool
559alg_src_ip_wc(enum ct_alg_ctl_type alg_ctl_type)
560{
561 if (alg_ctl_type == CT_ALG_CTL_SIP) {
562 return true;
563 }
564 return false;
565}
566
94e71143
DB
567static void
568handle_alg_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
569 struct dp_packet *pkt, enum ct_alg_ctl_type ct_alg_ctl,
967bb5c5 570 struct conn *conn, long long now, bool nat)
94e71143
DB
571{
572 /* ALG control packet handling with expectation creation. */
3a2a425b 573 if (OVS_UNLIKELY(alg_helpers[ct_alg_ctl] && conn && conn->alg)) {
967bb5c5
DB
574 ovs_mutex_lock(&conn->lock);
575 alg_helpers[ct_alg_ctl](ct, ctx, pkt, conn, now, CT_FTP_CTL_INTEREST,
576 nat);
577 ovs_mutex_unlock(&conn->lock);
94e71143 578 }
7be77cb0
DB
579}
580
286de272
DB
581static void
582pat_packet(struct dp_packet *pkt, const struct conn *conn)
583{
584 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
585 if (conn->key.nw_proto == IPPROTO_TCP) {
586 struct tcp_header *th = dp_packet_l4(pkt);
587 packet_set_tcp_port(pkt, conn->rev_key.dst.port, th->tcp_dst);
588 } else if (conn->key.nw_proto == IPPROTO_UDP) {
589 struct udp_header *uh = dp_packet_l4(pkt);
590 packet_set_udp_port(pkt, conn->rev_key.dst.port, uh->udp_dst);
591 }
592 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
593 if (conn->key.nw_proto == IPPROTO_TCP) {
594 struct tcp_header *th = dp_packet_l4(pkt);
595 packet_set_tcp_port(pkt, th->tcp_src, conn->rev_key.src.port);
596 } else if (conn->key.nw_proto == IPPROTO_UDP) {
597 struct udp_header *uh = dp_packet_l4(pkt);
598 packet_set_udp_port(pkt, uh->udp_src, conn->rev_key.src.port);
599 }
600 }
601}
602
603static void
604nat_packet(struct dp_packet *pkt, const struct conn *conn, bool related)
605{
606 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
607 pkt->md.ct_state |= CS_SRC_NAT;
608 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
609 struct ip_header *nh = dp_packet_l3(pkt);
610 packet_set_ipv4_addr(pkt, &nh->ip_src,
cda1b109 611 conn->rev_key.dst.addr.ipv4);
286de272
DB
612 } else {
613 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
614 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
615 nh6->ip6_src.be32,
cda1b109 616 &conn->rev_key.dst.addr.ipv6, true);
286de272
DB
617 }
618 if (!related) {
619 pat_packet(pkt, conn);
620 }
621 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
622 pkt->md.ct_state |= CS_DST_NAT;
623 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
624 struct ip_header *nh = dp_packet_l3(pkt);
625 packet_set_ipv4_addr(pkt, &nh->ip_dst,
cda1b109 626 conn->rev_key.src.addr.ipv4);
286de272
DB
627 } else {
628 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
629 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
630 nh6->ip6_dst.be32,
cda1b109 631 &conn->rev_key.src.addr.ipv6, true);
286de272
DB
632 }
633 if (!related) {
634 pat_packet(pkt, conn);
635 }
636 }
637}
638
639static void
640un_pat_packet(struct dp_packet *pkt, const struct conn *conn)
641{
642 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
643 if (conn->key.nw_proto == IPPROTO_TCP) {
644 struct tcp_header *th = dp_packet_l4(pkt);
645 packet_set_tcp_port(pkt, th->tcp_src, conn->key.src.port);
646 } else if (conn->key.nw_proto == IPPROTO_UDP) {
647 struct udp_header *uh = dp_packet_l4(pkt);
648 packet_set_udp_port(pkt, uh->udp_src, conn->key.src.port);
649 }
650 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
651 if (conn->key.nw_proto == IPPROTO_TCP) {
652 struct tcp_header *th = dp_packet_l4(pkt);
653 packet_set_tcp_port(pkt, conn->key.dst.port, th->tcp_dst);
654 } else if (conn->key.nw_proto == IPPROTO_UDP) {
655 struct udp_header *uh = dp_packet_l4(pkt);
656 packet_set_udp_port(pkt, conn->key.dst.port, uh->udp_dst);
657 }
658 }
659}
660
edd1bef4
DB
661static void
662reverse_pat_packet(struct dp_packet *pkt, const struct conn *conn)
663{
664 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
665 if (conn->key.nw_proto == IPPROTO_TCP) {
666 struct tcp_header *th_in = dp_packet_l4(pkt);
667 packet_set_tcp_port(pkt, conn->key.src.port,
668 th_in->tcp_dst);
669 } else if (conn->key.nw_proto == IPPROTO_UDP) {
670 struct udp_header *uh_in = dp_packet_l4(pkt);
671 packet_set_udp_port(pkt, conn->key.src.port,
672 uh_in->udp_dst);
673 }
674 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
675 if (conn->key.nw_proto == IPPROTO_TCP) {
676 struct tcp_header *th_in = dp_packet_l4(pkt);
677 packet_set_tcp_port(pkt, th_in->tcp_src,
678 conn->key.dst.port);
679 } else if (conn->key.nw_proto == IPPROTO_UDP) {
680 struct udp_header *uh_in = dp_packet_l4(pkt);
681 packet_set_udp_port(pkt, uh_in->udp_src,
682 conn->key.dst.port);
683 }
684 }
685}
686
687static void
688reverse_nat_packet(struct dp_packet *pkt, const struct conn *conn)
689{
690 char *tail = dp_packet_tail(pkt);
ba5ca284 691 uint8_t pad = dp_packet_l2_pad_size(pkt);
edd1bef4
DB
692 struct conn_key inner_key;
693 const char *inner_l4 = NULL;
694 uint16_t orig_l3_ofs = pkt->l3_ofs;
695 uint16_t orig_l4_ofs = pkt->l4_ofs;
696
697 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
698 struct ip_header *nh = dp_packet_l3(pkt);
699 struct icmp_header *icmp = dp_packet_l4(pkt);
700 struct ip_header *inner_l3 = (struct ip_header *) (icmp + 1);
ba5ca284
DB
701 /* This call is already verified to succeed during the code path from
702 * 'conn_key_extract()' which calls 'extract_l4_icmp()'. */
bd5e81a0
DB
703 extract_l3_ipv4(&inner_key, inner_l3, tail - ((char *)inner_l3) - pad,
704 &inner_l4, false);
edd1bef4
DB
705 pkt->l3_ofs += (char *) inner_l3 - (char *) nh;
706 pkt->l4_ofs += inner_l4 - (char *) icmp;
707
708 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
709 packet_set_ipv4_addr(pkt, &inner_l3->ip_src,
cda1b109 710 conn->key.src.addr.ipv4);
edd1bef4
DB
711 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
712 packet_set_ipv4_addr(pkt, &inner_l3->ip_dst,
cda1b109 713 conn->key.dst.addr.ipv4);
edd1bef4 714 }
dec0dbbc 715
edd1bef4
DB
716 reverse_pat_packet(pkt, conn);
717 icmp->icmp_csum = 0;
718 icmp->icmp_csum = csum(icmp, tail - (char *) icmp - pad);
719 } else {
720 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
721 struct icmp6_error_header *icmp6 = dp_packet_l4(pkt);
722 struct ovs_16aligned_ip6_hdr *inner_l3_6 =
723 (struct ovs_16aligned_ip6_hdr *) (icmp6 + 1);
ba5ca284
DB
724 /* This call is already verified to succeed during the code path from
725 * 'conn_key_extract()' which calls 'extract_l4_icmp6()'. */
edd1bef4
DB
726 extract_l3_ipv6(&inner_key, inner_l3_6,
727 tail - ((char *)inner_l3_6) - pad,
728 &inner_l4);
729 pkt->l3_ofs += (char *) inner_l3_6 - (char *) nh6;
730 pkt->l4_ofs += inner_l4 - (char *) icmp6;
731
732 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
733 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
734 inner_l3_6->ip6_src.be32,
cda1b109 735 &conn->key.src.addr.ipv6, true);
edd1bef4
DB
736 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
737 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
738 inner_l3_6->ip6_dst.be32,
cda1b109 739 &conn->key.dst.addr.ipv6, true);
edd1bef4
DB
740 }
741 reverse_pat_packet(pkt, conn);
edd1bef4 742 icmp6->icmp6_base.icmp6_cksum = 0;
76d85771
DB
743 icmp6->icmp6_base.icmp6_cksum = packet_csum_upperlayer6(nh6, icmp6,
744 IPPROTO_ICMPV6, tail - (char *) icmp6 - pad);
edd1bef4
DB
745 }
746 pkt->l3_ofs = orig_l3_ofs;
747 pkt->l4_ofs = orig_l4_ofs;
748}
749
286de272
DB
750static void
751un_nat_packet(struct dp_packet *pkt, const struct conn *conn,
752 bool related)
753{
754 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
755 pkt->md.ct_state |= CS_DST_NAT;
756 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
757 struct ip_header *nh = dp_packet_l3(pkt);
758 packet_set_ipv4_addr(pkt, &nh->ip_dst,
cda1b109 759 conn->key.src.addr.ipv4);
286de272
DB
760 } else {
761 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
762 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
763 nh6->ip6_dst.be32,
cda1b109 764 &conn->key.src.addr.ipv6, true);
286de272 765 }
edd1bef4
DB
766
767 if (OVS_UNLIKELY(related)) {
768 reverse_nat_packet(pkt, conn);
769 } else {
286de272
DB
770 un_pat_packet(pkt, conn);
771 }
772 } else if (conn->nat_info->nat_action & NAT_ACTION_DST) {
773 pkt->md.ct_state |= CS_SRC_NAT;
774 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
775 struct ip_header *nh = dp_packet_l3(pkt);
776 packet_set_ipv4_addr(pkt, &nh->ip_src,
cda1b109 777 conn->key.dst.addr.ipv4);
286de272
DB
778 } else {
779 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
780 packet_set_ipv6_addr(pkt, conn->key.nw_proto,
781 nh6->ip6_src.be32,
cda1b109 782 &conn->key.dst.addr.ipv6, true);
286de272 783 }
edd1bef4
DB
784
785 if (OVS_UNLIKELY(related)) {
786 reverse_nat_packet(pkt, conn);
787 } else {
286de272
DB
788 un_pat_packet(pkt, conn);
789 }
790 }
791}
792
bd5e81a0 793static void
967bb5c5 794conn_seq_skew_set(struct conntrack *ct, const struct conn *conn_in,
bd5e81a0 795 long long now, int seq_skew, bool seq_skew_dir)
967bb5c5 796 OVS_NO_THREAD_SAFETY_ANALYSIS
bd5e81a0 797{
967bb5c5 798 struct conn *conn;
967bb5c5 799 ovs_mutex_unlock(&conn_in->lock);
4048c508 800 conn_lookup(ct, &conn_in->key, now, &conn, NULL);
967bb5c5
DB
801 ovs_mutex_lock(&conn_in->lock);
802
bd5e81a0
DB
803 if (conn && seq_skew) {
804 conn->seq_skew = seq_skew;
805 conn->seq_skew_dir = seq_skew_dir;
806 }
a720a7fa
DB
807}
808
3a2a425b
DB
809static bool
810ct_verify_helper(const char *helper, enum ct_alg_ctl_type ct_alg_ctl)
811{
812 if (ct_alg_ctl == CT_ALG_CTL_NONE) {
813 return true;
814 } else if (helper) {
815 if ((ct_alg_ctl == CT_ALG_CTL_FTP) &&
816 !strncmp(helper, "ftp", strlen("ftp"))) {
817 return true;
818 } else if ((ct_alg_ctl == CT_ALG_CTL_TFTP) &&
819 !strncmp(helper, "tftp", strlen("tftp"))) {
820 return true;
821 } else {
822 return false;
823 }
824 } else {
825 return false;
826 }
827}
828
a489b168
DDP
829static struct conn *
830conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
286de272
DB
831 struct conn_lookup_ctx *ctx, bool commit, long long now,
832 const struct nat_action_info_t *nat_action_info,
967bb5c5 833 const char *helper, const struct alg_exp_node *alg_exp,
3a2a425b 834 enum ct_alg_ctl_type ct_alg_ctl)
967bb5c5 835 OVS_REQUIRES(ct->ct_lock)
a489b168 836{
a489b168 837 struct conn *nc = NULL;
967bb5c5 838 struct conn *nat_conn = NULL;
a489b168
DDP
839
840 if (!valid_new(pkt, &ctx->key)) {
286de272 841 pkt->md.ct_state = CS_INVALID;
a489b168
DDP
842 return nc;
843 }
dec0dbbc 844
286de272 845 pkt->md.ct_state = CS_NEW;
dec0dbbc 846
bd5e81a0
DB
847 if (alg_exp) {
848 pkt->md.ct_state |= CS_RELATED;
849 }
a489b168
DDP
850
851 if (commit) {
852 unsigned int n_conn_limit;
a489b168 853 atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
a489b168
DDP
854 if (atomic_count_get(&ct->n_conn) >= n_conn_limit) {
855 COVERAGE_INC(conntrack_full);
856 return nc;
857 }
858
967bb5c5 859 nc = new_conn(ct, pkt, &ctx->key, now);
a720a7fa 860 memcpy(&nc->key, &ctx->key, sizeof nc->key);
82b9ac94 861 memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
286de272 862 conn_key_reverse(&nc->rev_key);
a489b168 863
3a2a425b
DB
864 if (ct_verify_helper(helper, ct_alg_ctl)) {
865 nc->alg = nullable_xstrdup(helper);
bd5e81a0
DB
866 }
867
868 if (alg_exp) {
869 nc->alg_related = true;
870 nc->mark = alg_exp->master_mark;
871 nc->label = alg_exp->master_label;
872 nc->master_key = alg_exp->master_key;
873 }
874
286de272
DB
875 if (nat_action_info) {
876 nc->nat_info = xmemdup(nat_action_info, sizeof *nc->nat_info);
967bb5c5 877 nat_conn = xzalloc(sizeof *nat_conn);
a489b168 878
bd5e81a0 879 if (alg_exp) {
be38342d 880 if (alg_exp->nat_rpl_dst) {
bd5e81a0
DB
881 nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr;
882 nc->nat_info->nat_action = NAT_ACTION_SRC;
883 } else {
884 nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr;
885 nc->nat_info->nat_action = NAT_ACTION_DST;
886 }
bd5e81a0 887 } else {
967bb5c5
DB
888 memcpy(nat_conn, nc, sizeof *nat_conn);
889 bool nat_res = nat_select_range_tuple(ct, nc, nat_conn);
286de272 890
bd5e81a0
DB
891 if (!nat_res) {
892 goto nat_res_exhaustion;
893 }
286de272 894
967bb5c5
DB
895 /* Update nc with nat adjustments made to nat_conn by
896 * nat_select_range_tuple(). */
897 memcpy(nc, nat_conn, sizeof *nc);
286de272 898 }
967bb5c5 899
dbb597d3 900 nat_packet(pkt, nc, ctx->icmp_related);
967bb5c5
DB
901 memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key);
902 memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key);
903 nat_conn->conn_type = CT_CONN_TYPE_UN_NAT;
904 nat_conn->nat_info = NULL;
905 nat_conn->alg = NULL;
906 nat_conn->nat_conn = NULL;
907 uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis);
908 cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
909 }
910
911 nc->nat_conn = nat_conn;
912 ovs_mutex_init_adaptive(&nc->lock);
913 nc->conn_type = CT_CONN_TYPE_DEFAULT;
914 cmap_insert(&ct->conns, &nc->cm_node, ctx->hash);
a489b168 915 atomic_count_inc(&ct->n_conn);
967bb5c5 916 ctx->conn = nc; /* For completeness. */
a489b168 917 }
bd5e81a0 918
a489b168 919 return nc;
bd5e81a0 920
967bb5c5
DB
921 /* This would be a user error or a DOS attack. A user error is prevented
922 * by allocating enough combinations of NAT addresses when combined with
923 * ephemeral ports. A DOS attack should be protected against with
924 * firewall rules or a separate firewall. Also using zone partitioning
925 * can limit DoS impact. */
bd5e81a0 926nat_res_exhaustion:
967bb5c5
DB
927 free(nat_conn);
928 ovs_list_remove(&nc->exp_node);
929 delete_conn_cmn(nc);
bd5e81a0
DB
930 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
931 VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
932 "if DoS attack, use firewalling and/or zone partitioning.");
933 return NULL;
a489b168
DDP
934}
935
286de272
DB
936static bool
937conn_update_state(struct conntrack *ct, struct dp_packet *pkt,
967bb5c5
DB
938 struct conn_lookup_ctx *ctx, struct conn *conn,
939 long long now)
286de272 940{
967bb5c5 941 ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
286de272
DB
942 bool create_new_conn = false;
943
dbb597d3 944 if (ctx->icmp_related) {
286de272
DB
945 pkt->md.ct_state |= CS_RELATED;
946 if (ctx->reply) {
947 pkt->md.ct_state |= CS_REPLY_DIR;
948 }
949 } else {
967bb5c5 950 if (conn->alg_related) {
bd5e81a0
DB
951 pkt->md.ct_state |= CS_RELATED;
952 }
dec0dbbc 953
967bb5c5 954 enum ct_update_res res = conn_update(ct, conn, pkt, ctx, now);
286de272
DB
955
956 switch (res) {
957 case CT_UPDATE_VALID:
958 pkt->md.ct_state |= CS_ESTABLISHED;
959 pkt->md.ct_state &= ~CS_NEW;
960 if (ctx->reply) {
961 pkt->md.ct_state |= CS_REPLY_DIR;
962 }
963 break;
964 case CT_UPDATE_INVALID:
965 pkt->md.ct_state = CS_INVALID;
966 break;
967 case CT_UPDATE_NEW:
967bb5c5 968 ovs_mutex_lock(&ct->ct_lock);
4048c508 969 if (conn_lookup(ct, &conn->key, now, NULL, NULL)) {
28274f77
DB
970 conn_clean(ct, conn);
971 }
967bb5c5 972 ovs_mutex_unlock(&ct->ct_lock);
286de272
DB
973 create_new_conn = true;
974 break;
975 default:
976 OVS_NOT_REACHED();
977 }
978 }
979 return create_new_conn;
980}
981
286de272
DB
982static void
983handle_nat(struct dp_packet *pkt, struct conn *conn,
984 uint16_t zone, bool reply, bool related)
985{
986 if (conn->nat_info &&
987 (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
988 (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT) &&
989 zone != pkt->md.ct_zone))) {
bd5e81a0 990
286de272
DB
991 if (pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) {
992 pkt->md.ct_state &= ~(CS_SRC_NAT | CS_DST_NAT);
993 }
994 if (reply) {
995 un_nat_packet(pkt, conn, related);
996 } else {
997 nat_packet(pkt, conn, related);
998 }
999 }
1000}
1001
f8016041
DB
1002static bool
1003check_orig_tuple(struct conntrack *ct, struct dp_packet *pkt,
1004 struct conn_lookup_ctx *ctx_in, long long now,
967bb5c5 1005 struct conn **conn,
f8016041 1006 const struct nat_action_info_t *nat_action_info)
f8016041 1007{
a0b89c51
DB
1008 if (!(pkt->md.ct_state & (CS_SRC_NAT | CS_DST_NAT)) ||
1009 (ctx_in->key.dl_type == htons(ETH_TYPE_IP) &&
f8016041
DB
1010 !pkt->md.ct_orig_tuple.ipv4.ipv4_proto) ||
1011 (ctx_in->key.dl_type == htons(ETH_TYPE_IPV6) &&
1012 !pkt->md.ct_orig_tuple.ipv6.ipv6_proto) ||
f8016041
DB
1013 nat_action_info) {
1014 return false;
1015 }
1016
967bb5c5
DB
1017 struct conn_key key;
1018 memset(&key, 0 , sizeof key);
f8016041
DB
1019
1020 if (ctx_in->key.dl_type == htons(ETH_TYPE_IP)) {
967bb5c5
DB
1021 key.src.addr.ipv4 = pkt->md.ct_orig_tuple.ipv4.ipv4_src;
1022 key.dst.addr.ipv4 = pkt->md.ct_orig_tuple.ipv4.ipv4_dst;
f8016041
DB
1023
1024 if (ctx_in->key.nw_proto == IPPROTO_ICMP) {
967bb5c5
DB
1025 key.src.icmp_id = ctx_in->key.src.icmp_id;
1026 key.dst.icmp_id = ctx_in->key.dst.icmp_id;
f8016041 1027 uint16_t src_port = ntohs(pkt->md.ct_orig_tuple.ipv4.src_port);
967bb5c5
DB
1028 key.src.icmp_type = (uint8_t) src_port;
1029 key.dst.icmp_type = reverse_icmp_type(key.src.icmp_type);
f8016041 1030 } else {
967bb5c5
DB
1031 key.src.port = pkt->md.ct_orig_tuple.ipv4.src_port;
1032 key.dst.port = pkt->md.ct_orig_tuple.ipv4.dst_port;
f8016041 1033 }
967bb5c5 1034 key.nw_proto = pkt->md.ct_orig_tuple.ipv4.ipv4_proto;
f8016041 1035 } else {
967bb5c5
DB
1036 key.src.addr.ipv6 = pkt->md.ct_orig_tuple.ipv6.ipv6_src;
1037 key.dst.addr.ipv6 = pkt->md.ct_orig_tuple.ipv6.ipv6_dst;
f8016041
DB
1038
1039 if (ctx_in->key.nw_proto == IPPROTO_ICMPV6) {
967bb5c5
DB
1040 key.src.icmp_id = ctx_in->key.src.icmp_id;
1041 key.dst.icmp_id = ctx_in->key.dst.icmp_id;
f8016041 1042 uint16_t src_port = ntohs(pkt->md.ct_orig_tuple.ipv6.src_port);
967bb5c5
DB
1043 key.src.icmp_type = (uint8_t) src_port;
1044 key.dst.icmp_type = reverse_icmp6_type(key.src.icmp_type);
f8016041 1045 } else {
967bb5c5
DB
1046 key.src.port = pkt->md.ct_orig_tuple.ipv6.src_port;
1047 key.dst.port = pkt->md.ct_orig_tuple.ipv6.dst_port;
f8016041 1048 }
967bb5c5 1049 key.nw_proto = pkt->md.ct_orig_tuple.ipv6.ipv6_proto;
f8016041
DB
1050 }
1051
967bb5c5
DB
1052 key.dl_type = ctx_in->key.dl_type;
1053 key.zone = pkt->md.ct_zone;
4048c508 1054 conn_lookup(ct, &key, now, conn, NULL);
f8016041
DB
1055 return *conn ? true : false;
1056}
1057
94e71143
DB
1058static bool
1059conn_update_state_alg(struct conntrack *ct, struct dp_packet *pkt,
1060 struct conn_lookup_ctx *ctx, struct conn *conn,
1061 const struct nat_action_info_t *nat_action_info,
1062 enum ct_alg_ctl_type ct_alg_ctl, long long now,
967bb5c5 1063 bool *create_new_conn)
94e71143
DB
1064{
1065 if (is_ftp_ctl(ct_alg_ctl)) {
1066 /* Keep sequence tracking in sync with the source of the
1067 * sequence skew. */
967bb5c5 1068 ovs_mutex_lock(&conn->lock);
94e71143
DB
1069 if (ctx->reply != conn->seq_skew_dir) {
1070 handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
1071 !!nat_action_info);
967bb5c5
DB
1072 /* conn_update_state locks for unrelated fields, so unlock. */
1073 ovs_mutex_unlock(&conn->lock);
1074 *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
94e71143 1075 } else {
967bb5c5
DB
1076 /* conn_update_state locks for unrelated fields, so unlock. */
1077 ovs_mutex_unlock(&conn->lock);
1078 *create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
1079 ovs_mutex_lock(&conn->lock);
030958a0
DB
1080 if (*create_new_conn == false) {
1081 handle_ftp_ctl(ct, ctx, pkt, conn, now, CT_FTP_CTL_OTHER,
1082 !!nat_action_info);
1083 }
967bb5c5 1084 ovs_mutex_unlock(&conn->lock);
94e71143
DB
1085 }
1086 return true;
1087 }
1088 return false;
1089}
1090
286de272 1091static void
a489b168
DDP
1092process_one(struct conntrack *ct, struct dp_packet *pkt,
1093 struct conn_lookup_ctx *ctx, uint16_t zone,
286de272
DB
1094 bool force, bool commit, long long now, const uint32_t *setmark,
1095 const struct ovs_key_ct_labels *setlabel,
bd5e81a0 1096 const struct nat_action_info_t *nat_action_info,
bd7d93f8 1097 ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
a489b168 1098{
967bb5c5
DB
1099 bool create_new_conn = false;
1100 conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply);
1101 struct conn *conn = ctx->conn;
a489b168 1102
a76a37ef 1103 /* Delete found entry if in wrong direction. 'force' implies commit. */
a720a7fa 1104 if (OVS_UNLIKELY(force && ctx->reply && conn)) {
967bb5c5 1105 ovs_mutex_lock(&ct->ct_lock);
4048c508 1106 if (conn_lookup(ct, &conn->key, now, NULL, NULL)) {
28274f77
DB
1107 conn_clean(ct, conn);
1108 }
967bb5c5 1109 ovs_mutex_unlock(&ct->ct_lock);
a76a37ef
JR
1110 conn = NULL;
1111 }
1112
286de272
DB
1113 if (OVS_LIKELY(conn)) {
1114 if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
a489b168 1115
286de272 1116 ctx->reply = true;
967bb5c5 1117 struct conn *rev_conn = conn; /* Save for debugging. */
4048c508 1118 uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis);
967bb5c5 1119 conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
a489b168 1120
967bb5c5 1121 if (!conn) {
286de272 1122 pkt->md.ct_state |= CS_TRACKED | CS_INVALID;
967bb5c5
DB
1123 char *log_msg = xasprintf("Missing master conn %p", rev_conn);
1124 ct_print_conn_info(conn, log_msg, VLL_INFO, true, true);
1125 free(log_msg);
286de272 1126 return;
a489b168
DDP
1127 }
1128 }
286de272
DB
1129 }
1130
bd7d93f8
DB
1131 enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst,
1132 helper);
bd5e81a0 1133
286de272 1134 if (OVS_LIKELY(conn)) {
94e71143
DB
1135 if (OVS_LIKELY(!conn_update_state_alg(ct, pkt, ctx, conn,
1136 nat_action_info,
967bb5c5 1137 ct_alg_ctl, now,
94e71143 1138 &create_new_conn))) {
967bb5c5 1139 create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
bd5e81a0 1140 }
286de272 1141 if (nat_action_info && !create_new_conn) {
dbb597d3 1142 handle_nat(pkt, conn, zone, ctx->reply, ctx->icmp_related);
286de272 1143 }
bd5e81a0 1144
a0b89c51 1145 } else if (check_orig_tuple(ct, pkt, ctx, now, &conn, nat_action_info)) {
967bb5c5 1146 create_new_conn = conn_update_state(ct, pkt, ctx, conn, now);
a489b168 1147 } else {
dbb597d3 1148 if (ctx->icmp_related) {
bd5e81a0
DB
1149 /* An icmp related conn should always be found; no new
1150 connection is created based on an icmp related packet. */
286de272 1151 pkt->md.ct_state = CS_INVALID;
5c2e106b 1152 } else {
286de272 1153 create_new_conn = true;
5c2e106b 1154 }
a489b168
DDP
1155 }
1156
bd5e81a0 1157 const struct alg_exp_node *alg_exp = NULL;
96bbcbf7 1158 struct alg_exp_node alg_exp_entry;
dec0dbbc 1159
286de272 1160 if (OVS_UNLIKELY(create_new_conn)) {
bd5e81a0 1161
967bb5c5 1162 ovs_rwlock_rdlock(&ct->resources_lock);
bd5e81a0 1163 alg_exp = expectation_lookup(&ct->alg_expectations, &ctx->key,
be38342d
DB
1164 ct->hash_basis,
1165 alg_src_ip_wc(ct_alg_ctl));
bd5e81a0 1166 if (alg_exp) {
c3f6bae2 1167 memcpy(&alg_exp_entry, alg_exp, sizeof alg_exp_entry);
bd5e81a0
DB
1168 alg_exp = &alg_exp_entry;
1169 }
967bb5c5 1170 ovs_rwlock_unlock(&ct->resources_lock);
bd5e81a0 1171
967bb5c5 1172 ovs_mutex_lock(&ct->ct_lock);
4048c508 1173 if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
28274f77
DB
1174 conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
1175 helper, alg_exp, ct_alg_ctl);
1176 }
967bb5c5 1177 ovs_mutex_unlock(&ct->ct_lock);
286de272
DB
1178 }
1179
bd5e81a0
DB
1180 write_ct_md(pkt, zone, conn, &ctx->key, alg_exp);
1181
286de272
DB
1182 if (conn && setmark) {
1183 set_mark(pkt, conn, setmark[0], setmark[1]);
1184 }
a489b168 1185
286de272
DB
1186 if (conn && setlabel) {
1187 set_label(pkt, conn, &setlabel[0], &setlabel[1]);
1188 }
1189
967bb5c5 1190 handle_alg_ctl(ct, ctx, pkt, ct_alg_ctl, conn, now, !!nat_action_info);
a489b168
DDP
1191}
1192
1193/* Sends the packets in '*pkt_batch' through the connection tracker 'ct'. All
51b9a533 1194 * the packets must have the same 'dl_type' (IPv4 or IPv6) and should have
4ea96698
DB
1195 * the l3 and and l4 offset properly set. Performs fragment reassembly with
1196 * the help of ipf_preprocess_conntrack().
a489b168
DDP
1197 *
1198 * If 'commit' is true, the packets are allowed to create new entries in the
1199 * connection tables. 'setmark', if not NULL, should point to a two
1200 * elements array containing a value and a mask to set the connection mark.
1201 * 'setlabel' behaves similarly for the connection label.*/
1202int
1203conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
a76a37ef 1204 ovs_be16 dl_type, bool force, bool commit, uint16_t zone,
66e4ad8a 1205 const uint32_t *setmark,
a489b168 1206 const struct ovs_key_ct_labels *setlabel,
bd7d93f8 1207 ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
94053e66
FA
1208 const struct nat_action_info_t *nat_action_info,
1209 long long now)
a489b168 1210{
4ea96698
DB
1211 ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
1212 ct->hash_basis);
1213
43495c45 1214 struct dp_packet *packet;
61ce32b9 1215 struct conn_lookup_ctx ctx;
a489b168 1216
e883448e 1217 DP_PACKET_BATCH_FOR_EACH (i, packet, pkt_batch) {
4ea96698
DB
1218 if (packet->md.ct_state == CS_INVALID
1219 || !conn_key_extract(ct, packet, dl_type, &ctx, zone)) {
43495c45
BB
1220 packet->md.ct_state = CS_INVALID;
1221 write_ct_md(packet, zone, NULL, NULL, NULL);
a489b168
DDP
1222 continue;
1223 }
94e71143 1224 process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
bd7d93f8 1225 setlabel, nat_action_info, tp_src, tp_dst, helper);
a489b168
DDP
1226 }
1227
4ea96698
DB
1228 ipf_postprocess_conntrack(ct->ipf, pkt_batch, now, dl_type);
1229
a489b168
DDP
1230 return 0;
1231}
1232
1fe178d2
EG
1233void
1234conntrack_clear(struct dp_packet *packet)
1235{
1236 /* According to pkt_metadata_init(), ct_state == 0 is enough to make all of
1237 * the conntrack fields invalid. */
1238 packet->md.ct_state = 0;
1239}
1240
a489b168
DDP
1241static void
1242set_mark(struct dp_packet *pkt, struct conn *conn, uint32_t val, uint32_t mask)
1243{
967bb5c5 1244 ovs_mutex_lock(&conn->lock);
bd5e81a0
DB
1245 if (conn->alg_related) {
1246 pkt->md.ct_mark = conn->mark;
1247 } else {
1248 pkt->md.ct_mark = val | (pkt->md.ct_mark & ~(mask));
1249 conn->mark = pkt->md.ct_mark;
1250 }
967bb5c5 1251 ovs_mutex_unlock(&conn->lock);
a489b168
DDP
1252}
1253
1254static void
1255set_label(struct dp_packet *pkt, struct conn *conn,
1256 const struct ovs_key_ct_labels *val,
1257 const struct ovs_key_ct_labels *mask)
1258{
967bb5c5 1259 ovs_mutex_lock(&conn->lock);
bd5e81a0
DB
1260 if (conn->alg_related) {
1261 pkt->md.ct_label = conn->label;
1262 } else {
1263 ovs_u128 v, m;
a489b168 1264
bd5e81a0
DB
1265 memcpy(&v, val, sizeof v);
1266 memcpy(&m, mask, sizeof m);
a489b168 1267
bd5e81a0 1268 pkt->md.ct_label.u64.lo = v.u64.lo
a489b168 1269 | (pkt->md.ct_label.u64.lo & ~(m.u64.lo));
bd5e81a0 1270 pkt->md.ct_label.u64.hi = v.u64.hi
a489b168 1271 | (pkt->md.ct_label.u64.hi & ~(m.u64.hi));
bd5e81a0
DB
1272 conn->label = pkt->md.ct_label;
1273 }
967bb5c5 1274 ovs_mutex_unlock(&conn->lock);
a489b168 1275}
286de272 1276
a489b168 1277\f
e6ef6cc6
DDP
1278/* Delete the expired connections from 'ctb', up to 'limit'. Returns the
1279 * earliest expiration time among the remaining connections in 'ctb'. Returns
1280 * LLONG_MAX if 'ctb' is empty. The return value might be smaller than 'now',
1281 * if 'limit' is reached */
1282static long long
967bb5c5 1283ct_sweep(struct conntrack *ct, long long now, size_t limit)
e6ef6cc6
DDP
1284{
1285 struct conn *conn, *next;
1286 long long min_expiration = LLONG_MAX;
e6ef6cc6
DDP
1287 size_t count = 0;
1288
967bb5c5
DB
1289 ovs_mutex_lock(&ct->ct_lock);
1290
dec0dbbc 1291 for (unsigned i = 0; i < N_CT_TM; i++) {
967bb5c5
DB
1292 LIST_FOR_EACH_SAFE (conn, next, exp_node, &ct->exp_lists[i]) {
1293 ovs_mutex_lock(&conn->lock);
1294 if (now < conn->expiration || count >= limit) {
a720a7fa 1295 min_expiration = MIN(min_expiration, conn->expiration);
967bb5c5 1296 ovs_mutex_unlock(&conn->lock);
a720a7fa
DB
1297 if (count >= limit) {
1298 /* Do not check other lists. */
1299 COVERAGE_INC(conntrack_long_cleanup);
967bb5c5 1300 goto out;
e6ef6cc6 1301 }
a720a7fa 1302 break;
967bb5c5
DB
1303 } else {
1304 ovs_mutex_unlock(&conn->lock);
1305 conn_clean(ct, conn);
e6ef6cc6 1306 }
a720a7fa 1307 count++;
e6ef6cc6
DDP
1308 }
1309 }
967bb5c5
DB
1310
1311out:
1312 VLOG_DBG("conntrack cleanup %"PRIuSIZE" entries in %lld msec", count,
1313 time_msec() - now);
1314 ovs_mutex_unlock(&ct->ct_lock);
e6ef6cc6
DDP
1315 return min_expiration;
1316}
1317
1318/* Cleans up old connection entries from 'ct'. Returns the time when the
1319 * next expiration might happen. The return value might be smaller than
1320 * 'now', meaning that an internal limit has been reached, and some expired
1321 * connections have not been deleted. */
1322static long long
1323conntrack_clean(struct conntrack *ct, long long now)
1324{
e6ef6cc6 1325 unsigned int n_conn_limit;
e6ef6cc6 1326 atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
967bb5c5
DB
1327 size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1;
1328 long long min_exp = ct_sweep(ct, now, clean_max);
1329 long long next_wakeup = MIN(min_exp, now + CT_TM_MIN);
e6ef6cc6
DDP
1330
1331 return next_wakeup;
1332}
1333
1334/* Cleanup:
e6ef6cc6
DDP
1335 *
1336 * We must call conntrack_clean() periodically. conntrack_clean() return
1337 * value gives an hint on when the next cleanup must be done (either because
1338 * there is an actual connection that expires, or because a new connection
1339 * might be created with the minimum timeout).
1340 *
1341 * The logic below has two goals:
1342 *
6c54734e
DDP
1343 * - We want to reduce the number of wakeups and batch connection cleanup
1344 * when the load is not very high. CT_CLEAN_INTERVAL ensures that if we
1345 * are coping with the current cleanup tasks, then we wait at least
1346 * 5 seconds to do further cleanup.
e6ef6cc6 1347 *
967bb5c5 1348 * - We don't want to keep the map locked too long, as we might prevent
6c54734e 1349 * traffic from flowing. CT_CLEAN_MIN_INTERVAL ensures that if cleanup is
967bb5c5 1350 * behind, there is at least some 200ms blocks of time when the map will be
6c54734e 1351 * left alone, so the datapath can operate unhindered.
e6ef6cc6
DDP
1352 */
1353#define CT_CLEAN_INTERVAL 5000 /* 5 seconds */
1354#define CT_CLEAN_MIN_INTERVAL 200 /* 0.2 seconds */
1355
1356static void *
1357clean_thread_main(void *f_)
1358{
1359 struct conntrack *ct = f_;
1360
1361 while (!latch_is_set(&ct->clean_thread_exit)) {
1362 long long next_wake;
1363 long long now = time_msec();
e6ef6cc6
DDP
1364 next_wake = conntrack_clean(ct, now);
1365
1366 if (next_wake < now) {
1367 poll_timer_wait_until(now + CT_CLEAN_MIN_INTERVAL);
1368 } else {
1369 poll_timer_wait_until(MAX(next_wake, now + CT_CLEAN_INTERVAL));
1370 }
1371 latch_wait(&ct->clean_thread_exit);
1372 poll_block();
1373 }
1374
1375 return NULL;
1376}
1377\f
e917d3ee
DB
1378/* 'Data' is a pointer to the beginning of the L3 header and 'new_data' is
1379 * used to store a pointer to the first byte after the L3 header. 'Size' is
1380 * the size of the packet beyond the data pointer. */
a489b168
DDP
1381static inline bool
1382extract_l3_ipv4(struct conn_key *key, const void *data, size_t size,
1383 const char **new_data, bool validate_checksum)
1384{
e917d3ee
DB
1385 if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {
1386 return false;
a489b168
DDP
1387 }
1388
dec0dbbc
DB
1389 const struct ip_header *ip = data;
1390 size_t ip_len = IP_IHL(ip->ip_ihl_ver) * 4;
a489b168 1391
e917d3ee
DB
1392 if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {
1393 return false;
1394 }
a489b168 1395
e917d3ee
DB
1396 if (OVS_UNLIKELY(size < ip_len)) {
1397 return false;
1398 }
a489b168 1399
e917d3ee
DB
1400 if (IP_IS_FRAGMENT(ip->ip_frag_off)) {
1401 return false;
a489b168
DDP
1402 }
1403
1404 if (validate_checksum && csum(data, ip_len) != 0) {
1405 return false;
1406 }
1407
e917d3ee
DB
1408 if (new_data) {
1409 *new_data = (char *) data + ip_len;
1410 }
1411
cda1b109
DB
1412 key->src.addr.ipv4 = get_16aligned_be32(&ip->ip_src);
1413 key->dst.addr.ipv4 = get_16aligned_be32(&ip->ip_dst);
a489b168
DDP
1414 key->nw_proto = ip->ip_proto;
1415
1416 return true;
1417}
1418
e917d3ee
DB
1419/* 'Data' is a pointer to the beginning of the L3 header and 'new_data' is
1420 * used to store a pointer to the first byte after the L3 header. 'Size' is
1421 * the size of the packet beyond the data pointer. */
a489b168
DDP
1422static inline bool
1423extract_l3_ipv6(struct conn_key *key, const void *data, size_t size,
1424 const char **new_data)
1425{
1426 const struct ovs_16aligned_ip6_hdr *ip6 = data;
286de272 1427
e917d3ee
DB
1428 if (OVS_UNLIKELY(size < sizeof *ip6)) {
1429 return false;
a489b168
DDP
1430 }
1431
1432 data = ip6 + 1;
1433 size -= sizeof *ip6;
dec0dbbc
DB
1434 uint8_t nw_proto = ip6->ip6_nxt;
1435 uint8_t nw_frag = 0;
a489b168 1436
523464ab
DB
1437 const struct ovs_16aligned_ip6_frag *frag_hdr;
1438 if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag, &frag_hdr)) {
a489b168
DDP
1439 return false;
1440 }
1441
a489b168
DDP
1442 if (nw_frag) {
1443 return false;
1444 }
1445
c8b1ad49
DB
1446 if (new_data) {
1447 *new_data = data;
1448 }
1449
cda1b109
DB
1450 memcpy(&key->src.addr.ipv6, &ip6->ip6_src, sizeof key->src.addr);
1451 memcpy(&key->dst.addr.ipv6, &ip6->ip6_dst, sizeof key->dst.addr);
a489b168
DDP
1452 key->nw_proto = nw_proto;
1453
1454 return true;
1455}
1456
1457static inline bool
1458checksum_valid(const struct conn_key *key, const void *data, size_t size,
1459 const void *l3)
1460{
a489b168 1461 if (key->dl_type == htons(ETH_TYPE_IP)) {
76d85771
DB
1462 uint32_t csum = packet_csum_pseudoheader(l3);
1463 return csum_finish(csum_continue(csum, data, size)) == 0;
a489b168 1464 } else if (key->dl_type == htons(ETH_TYPE_IPV6)) {
76d85771 1465 return packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0;
a489b168
DDP
1466 } else {
1467 return false;
1468 }
a489b168
DDP
1469}
1470
1471static inline bool
1472check_l4_tcp(const struct conn_key *key, const void *data, size_t size,
324459a3 1473 const void *l3, bool validate_checksum)
a489b168
DDP
1474{
1475 const struct tcp_header *tcp = data;
40225b0c
BP
1476 if (size < sizeof *tcp) {
1477 return false;
1478 }
a489b168 1479
40225b0c 1480 size_t tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
a489b168
DDP
1481 if (OVS_UNLIKELY(tcp_len < TCP_HEADER_LEN || tcp_len > size)) {
1482 return false;
1483 }
1484
324459a3 1485 return validate_checksum ? checksum_valid(key, data, size, l3) : true;
a489b168
DDP
1486}
1487
1488static inline bool
1489check_l4_udp(const struct conn_key *key, const void *data, size_t size,
324459a3 1490 const void *l3, bool validate_checksum)
a489b168
DDP
1491{
1492 const struct udp_header *udp = data;
40225b0c
BP
1493 if (size < sizeof *udp) {
1494 return false;
1495 }
a489b168 1496
40225b0c 1497 size_t udp_len = ntohs(udp->udp_len);
a489b168
DDP
1498 if (OVS_UNLIKELY(udp_len < UDP_HEADER_LEN || udp_len > size)) {
1499 return false;
1500 }
1501
1502 /* Validation must be skipped if checksum is 0 on IPv4 packets */
1503 return (udp->udp_csum == 0 && key->dl_type == htons(ETH_TYPE_IP))
324459a3 1504 || (validate_checksum ? checksum_valid(key, data, size, l3) : true);
a489b168
DDP
1505}
1506
1507static inline bool
324459a3 1508check_l4_icmp(const void *data, size_t size, bool validate_checksum)
a489b168 1509{
324459a3 1510 return validate_checksum ? csum(data, size) == 0 : true;
a489b168
DDP
1511}
1512
1513static inline bool
1514check_l4_icmp6(const struct conn_key *key, const void *data, size_t size,
324459a3 1515 const void *l3, bool validate_checksum)
a489b168 1516{
324459a3 1517 return validate_checksum ? checksum_valid(key, data, size, l3) : true;
a489b168
DDP
1518}
1519
1520static inline bool
6c2a9306
DB
1521extract_l4_tcp(struct conn_key *key, const void *data, size_t size,
1522 size_t *chk_len)
a489b168 1523{
6c2a9306 1524 if (OVS_UNLIKELY(size < (chk_len ? *chk_len : TCP_HEADER_LEN))) {
a489b168
DDP
1525 return false;
1526 }
1527
dec0dbbc 1528 const struct tcp_header *tcp = data;
a489b168
DDP
1529 key->src.port = tcp->tcp_src;
1530 key->dst.port = tcp->tcp_dst;
1531
1532 /* Port 0 is invalid */
1533 return key->src.port && key->dst.port;
1534}
1535
1536static inline bool
6c2a9306
DB
1537extract_l4_udp(struct conn_key *key, const void *data, size_t size,
1538 size_t *chk_len)
a489b168 1539{
6c2a9306 1540 if (OVS_UNLIKELY(size < (chk_len ? *chk_len : UDP_HEADER_LEN))) {
a489b168
DDP
1541 return false;
1542 }
1543
dec0dbbc 1544 const struct udp_header *udp = data;
a489b168
DDP
1545 key->src.port = udp->udp_src;
1546 key->dst.port = udp->udp_dst;
1547
1548 /* Port 0 is invalid */
1549 return key->src.port && key->dst.port;
1550}
1551
1552static inline bool extract_l4(struct conn_key *key, const void *data,
324459a3 1553 size_t size, bool *related, const void *l3,
6c2a9306 1554 bool validate_checksum, size_t *chk_len);
a489b168 1555
b269a122
DDP
1556static uint8_t
1557reverse_icmp_type(uint8_t type)
1558{
1559 switch (type) {
1560 case ICMP4_ECHO_REQUEST:
1561 return ICMP4_ECHO_REPLY;
1562 case ICMP4_ECHO_REPLY:
1563 return ICMP4_ECHO_REQUEST;
1564
1565 case ICMP4_TIMESTAMP:
1566 return ICMP4_TIMESTAMPREPLY;
1567 case ICMP4_TIMESTAMPREPLY:
1568 return ICMP4_TIMESTAMP;
1569
1570 case ICMP4_INFOREQUEST:
1571 return ICMP4_INFOREPLY;
1572 case ICMP4_INFOREPLY:
1573 return ICMP4_INFOREQUEST;
1574 default:
1575 OVS_NOT_REACHED();
1576 }
1577}
1578
a489b168
DDP
1579/* If 'related' is not NULL and the function is processing an ICMP
1580 * error packet, extract the l3 and l4 fields from the nested header
1581 * instead and set *related to true. If 'related' is NULL we're
1582 * already processing a nested header and no such recursion is
1583 * possible */
1584static inline int
1585extract_l4_icmp(struct conn_key *key, const void *data, size_t size,
6c2a9306 1586 bool *related, size_t *chk_len)
a489b168 1587{
6c2a9306 1588 if (OVS_UNLIKELY(size < (chk_len ? *chk_len : ICMP_HEADER_LEN))) {
a489b168
DDP
1589 return false;
1590 }
1591
dec0dbbc
DB
1592 const struct icmp_header *icmp = data;
1593
a489b168
DDP
1594 switch (icmp->icmp_type) {
1595 case ICMP4_ECHO_REQUEST:
1596 case ICMP4_ECHO_REPLY:
1597 case ICMP4_TIMESTAMP:
1598 case ICMP4_TIMESTAMPREPLY:
1599 case ICMP4_INFOREQUEST:
1600 case ICMP4_INFOREPLY:
b269a122
DDP
1601 if (icmp->icmp_code != 0) {
1602 return false;
1603 }
a489b168 1604 /* Separate ICMP connection: identified using id */
b269a122
DDP
1605 key->src.icmp_id = key->dst.icmp_id = icmp->icmp_fields.echo.id;
1606 key->src.icmp_type = icmp->icmp_type;
1607 key->dst.icmp_type = reverse_icmp_type(icmp->icmp_type);
a489b168
DDP
1608 break;
1609 case ICMP4_DST_UNREACH:
1610 case ICMP4_TIME_EXCEEDED:
1611 case ICMP4_PARAM_PROB:
1612 case ICMP4_SOURCEQUENCH:
1613 case ICMP4_REDIRECT: {
1614 /* ICMP packet part of another connection. We should
1615 * extract the key from embedded packet header */
1616 struct conn_key inner_key;
1617 const char *l3 = (const char *) (icmp + 1);
1618 const char *tail = (const char *) data + size;
1619 const char *l4;
a489b168
DDP
1620
1621 if (!related) {
1622 return false;
1623 }
1624
1625 memset(&inner_key, 0, sizeof inner_key);
1626 inner_key.dl_type = htons(ETH_TYPE_IP);
dec0dbbc 1627 bool ok = extract_l3_ipv4(&inner_key, l3, tail - l3, &l4, false);
a489b168
DDP
1628 if (!ok) {
1629 return false;
1630 }
1631
cda1b109 1632 if (inner_key.src.addr.ipv4 != key->dst.addr.ipv4) {
a489b168
DDP
1633 return false;
1634 }
1635
1636 key->src = inner_key.src;
1637 key->dst = inner_key.dst;
1638 key->nw_proto = inner_key.nw_proto;
6c2a9306 1639 size_t check_len = ICMP_ERROR_DATA_L4_LEN;
a489b168 1640
6c2a9306 1641 ok = extract_l4(key, l4, tail - l4, NULL, l3, false, &check_len);
a489b168
DDP
1642 if (ok) {
1643 conn_key_reverse(key);
1644 *related = true;
1645 }
1646 return ok;
1647 }
1648 default:
1649 return false;
1650 }
1651
1652 return true;
1653}
1654
b269a122
DDP
1655static uint8_t
1656reverse_icmp6_type(uint8_t type)
1657{
1658 switch (type) {
1659 case ICMP6_ECHO_REQUEST:
1660 return ICMP6_ECHO_REPLY;
1661 case ICMP6_ECHO_REPLY:
1662 return ICMP6_ECHO_REQUEST;
1663 default:
1664 OVS_NOT_REACHED();
1665 }
1666}
1667
a489b168
DDP
1668/* If 'related' is not NULL and the function is processing an ICMP
1669 * error packet, extract the l3 and l4 fields from the nested header
1670 * instead and set *related to true. If 'related' is NULL we're
1671 * already processing a nested header and no such recursion is
1672 * possible */
1673static inline bool
1674extract_l4_icmp6(struct conn_key *key, const void *data, size_t size,
1675 bool *related)
1676{
1677 const struct icmp6_header *icmp6 = data;
1678
1679 /* All the messages that we support need at least 4 bytes after
1680 * the header */
1681 if (size < sizeof *icmp6 + 4) {
1682 return false;
1683 }
1684
1685 switch (icmp6->icmp6_type) {
1686 case ICMP6_ECHO_REQUEST:
1687 case ICMP6_ECHO_REPLY:
b269a122
DDP
1688 if (icmp6->icmp6_code != 0) {
1689 return false;
1690 }
a489b168 1691 /* Separate ICMP connection: identified using id */
b269a122
DDP
1692 key->src.icmp_id = key->dst.icmp_id = *(ovs_be16 *) (icmp6 + 1);
1693 key->src.icmp_type = icmp6->icmp6_type;
1694 key->dst.icmp_type = reverse_icmp6_type(icmp6->icmp6_type);
a489b168
DDP
1695 break;
1696 case ICMP6_DST_UNREACH:
1697 case ICMP6_PACKET_TOO_BIG:
1698 case ICMP6_TIME_EXCEEDED:
1699 case ICMP6_PARAM_PROB: {
1700 /* ICMP packet part of another connection. We should
1701 * extract the key from embedded packet header */
1702 struct conn_key inner_key;
1703 const char *l3 = (const char *) icmp6 + 8;
1704 const char *tail = (const char *) data + size;
1705 const char *l4 = NULL;
a489b168
DDP
1706
1707 if (!related) {
1708 return false;
1709 }
1710
1711 memset(&inner_key, 0, sizeof inner_key);
1712 inner_key.dl_type = htons(ETH_TYPE_IPV6);
dec0dbbc 1713 bool ok = extract_l3_ipv6(&inner_key, l3, tail - l3, &l4);
a489b168
DDP
1714 if (!ok) {
1715 return false;
1716 }
1717
1718 /* pf doesn't do this, but it seems a good idea */
cda1b109
DB
1719 if (!ipv6_addr_equals(&inner_key.src.addr.ipv6,
1720 &key->dst.addr.ipv6)) {
a489b168
DDP
1721 return false;
1722 }
1723
1724 key->src = inner_key.src;
1725 key->dst = inner_key.dst;
1726 key->nw_proto = inner_key.nw_proto;
1727
6c2a9306 1728 ok = extract_l4(key, l4, tail - l4, NULL, l3, false, NULL);
a489b168
DDP
1729 if (ok) {
1730 conn_key_reverse(key);
1731 *related = true;
1732 }
1733 return ok;
1734 }
1735 default:
1736 return false;
1737 }
1738
1739 return true;
1740}
1741
1742/* Extract l4 fields into 'key', which must already contain valid l3
1743 * members.
1744 *
1745 * If 'related' is not NULL and an ICMP error packet is being
1746 * processed, the function will extract the key from the packet nested
1401f6de 1747 * in the ICMP payload and set '*related' to true.
a489b168 1748 *
9171c635
DB
1749 * 'size' here is the layer 4 size, which can be a nested size if parsing
1750 * an ICMP or ICMP6 header.
1751 *
a489b168 1752 * If 'related' is NULL, it means that we're already parsing a header nested
6c2a9306
DB
1753 * in an ICMP error. In this case, we skip the checksum and some length
1754 * validations. */
a489b168
DDP
1755static inline bool
1756extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
6c2a9306 1757 const void *l3, bool validate_checksum, size_t *chk_len)
a489b168
DDP
1758{
1759 if (key->nw_proto == IPPROTO_TCP) {
324459a3 1760 return (!related || check_l4_tcp(key, data, size, l3,
6c2a9306
DB
1761 validate_checksum))
1762 && extract_l4_tcp(key, data, size, chk_len);
a489b168 1763 } else if (key->nw_proto == IPPROTO_UDP) {
324459a3 1764 return (!related || check_l4_udp(key, data, size, l3,
6c2a9306
DB
1765 validate_checksum))
1766 && extract_l4_udp(key, data, size, chk_len);
a489b168
DDP
1767 } else if (key->dl_type == htons(ETH_TYPE_IP)
1768 && key->nw_proto == IPPROTO_ICMP) {
324459a3 1769 return (!related || check_l4_icmp(data, size, validate_checksum))
6c2a9306 1770 && extract_l4_icmp(key, data, size, related, chk_len);
a489b168
DDP
1771 } else if (key->dl_type == htons(ETH_TYPE_IPV6)
1772 && key->nw_proto == IPPROTO_ICMPV6) {
324459a3 1773 return (!related || check_l4_icmp6(key, data, size, l3,
6c2a9306
DB
1774 validate_checksum))
1775 && extract_l4_icmp6(key, data, size, related);
a489b168
DDP
1776 } else {
1777 return false;
1778 }
1779}
1780
1781static bool
66e4ad8a 1782conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
a489b168
DDP
1783 struct conn_lookup_ctx *ctx, uint16_t zone)
1784{
2482b0b0 1785 const struct eth_header *l2 = dp_packet_eth(pkt);
a489b168
DDP
1786 const struct ip_header *l3 = dp_packet_l3(pkt);
1787 const char *l4 = dp_packet_l4(pkt);
a489b168
DDP
1788
1789 memset(ctx, 0, sizeof *ctx);
1790
1791 if (!l2 || !l3 || !l4) {
1792 return false;
1793 }
1794
1795 ctx->key.zone = zone;
1796
1797 /* XXX In this function we parse the packet (again, it has already
1798 * gone through miniflow_extract()) for two reasons:
1799 *
1800 * 1) To extract the l3 addresses and l4 ports.
1801 * We already have the l3 and l4 headers' pointers. Extracting
1802 * the l3 addresses and the l4 ports is really cheap, since they
1803 * can be found at fixed locations.
66e4ad8a
DDP
1804 * 2) To extract the l4 type.
1805 * Extracting the l4 types, for IPv6 can be quite expensive, because
1806 * it's not at a fixed location.
a489b168
DDP
1807 *
1808 * Here's a way to avoid (2) with the help of the datapath.
66e4ad8a 1809 * The datapath doesn't keep the packet's extracted flow[1], so
a489b168 1810 * using that is not an option. We could use the packet's matching
66e4ad8a
DDP
1811 * megaflow, but we have to make sure that the l4 type (nw_proto)
1812 * is unwildcarded. This means either:
a489b168 1813 *
66e4ad8a
DDP
1814 * a) dpif-netdev unwildcards the l4 type when a new flow is installed
1815 * if the actions contains ct().
a489b168 1816 *
66e4ad8a
DDP
1817 * b) ofproto-dpif-xlate unwildcards the l4 type when translating a ct()
1818 * action. This is already done in different actions, but it's
1819 * unnecessary for the kernel.
a489b168
DDP
1820 *
1821 * ---
66e4ad8a 1822 * [1] The reasons for this are that keeping the flow increases
a489b168
DDP
1823 * (slightly) the cache footprint and increases computation
1824 * time as we move the packet around. Most importantly, the flow
1825 * should be updated by the actions and this can be slow, as
1826 * we use a sparse representation (miniflow).
1827 *
1828 */
dec0dbbc 1829 bool ok;
66e4ad8a 1830 ctx->key.dl_type = dl_type;
dec0dbbc 1831
a489b168 1832 if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
dec0dbbc 1833 bool hwol_bad_l3_csum = dp_packet_ip_checksum_bad(pkt);
324459a3
SC
1834 if (hwol_bad_l3_csum) {
1835 ok = false;
1836 } else {
dec0dbbc 1837 bool hwol_good_l3_csum = dp_packet_ip_checksum_valid(pkt);
324459a3 1838 /* Validate the checksum only when hwol is not supported. */
9171c635 1839 ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
324459a3
SC
1840 !hwol_good_l3_csum);
1841 }
a489b168 1842 } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
9171c635 1843 ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
a489b168
DDP
1844 } else {
1845 ok = false;
1846 }
1847
1848 if (ok) {
324459a3
SC
1849 bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
1850 if (!hwol_bad_l4_csum) {
1851 bool hwol_good_l4_csum = dp_packet_l4_checksum_valid(pkt);
1852 /* Validate the checksum only when hwol is not supported. */
9171c635 1853 if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
6c2a9306
DB
1854 &ctx->icmp_related, l3, !hwol_good_l4_csum,
1855 NULL)) {
324459a3
SC
1856 ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
1857 return true;
1858 }
a489b168
DDP
1859 }
1860 }
1861
1862 return false;
1863}
92edd073
DB
1864
1865static uint32_t
cda1b109 1866ct_addr_hash_add(uint32_t hash, const union ct_addr *addr)
92edd073
DB
1867{
1868 BUILD_ASSERT_DECL(sizeof *addr % 4 == 0);
1869 return hash_add_bytes32(hash, (const uint32_t *) addr, sizeof *addr);
1870}
1871
1872static uint32_t
1873ct_endpoint_hash_add(uint32_t hash, const struct ct_endpoint *ep)
1874{
1875 BUILD_ASSERT_DECL(sizeof *ep % 4 == 0);
1876 return hash_add_bytes32(hash, (const uint32_t *) ep, sizeof *ep);
1877}
a489b168
DDP
1878\f
1879/* Symmetric */
1880static uint32_t
1881conn_key_hash(const struct conn_key *key, uint32_t basis)
1882{
1883 uint32_t hsrc, hdst, hash;
a489b168 1884 hsrc = hdst = basis;
6b1d4625
DB
1885 hsrc = ct_endpoint_hash_add(hsrc, &key->src);
1886 hdst = ct_endpoint_hash_add(hdst, &key->dst);
a489b168
DDP
1887
1888 /* Even if source and destination are swapped the hash will be the same. */
1889 hash = hsrc ^ hdst;
1890
1891 /* Hash the rest of the key(L3 and L4 types and zone). */
763b40b0 1892 return hash_words((uint32_t *) (&key->dst + 1),
a489b168
DDP
1893 (uint32_t *) (key + 1) - (uint32_t *) (&key->dst + 1),
1894 hash);
a489b168
DDP
1895}
1896
1897static void
1898conn_key_reverse(struct conn_key *key)
1899{
dec0dbbc 1900 struct ct_endpoint tmp = key->src;
a489b168
DDP
1901 key->src = key->dst;
1902 key->dst = tmp;
1903}
1904
286de272 1905static uint32_t
cda1b109 1906nat_ipv6_addrs_delta(struct in6_addr *ipv6_min, struct in6_addr *ipv6_max)
286de272 1907{
cda1b109
DB
1908 uint8_t *ipv6_min_hi = &ipv6_min->s6_addr[0];
1909 uint8_t *ipv6_min_lo = &ipv6_min->s6_addr[0] + sizeof(uint64_t);
1910 uint8_t *ipv6_max_hi = &ipv6_max->s6_addr[0];
1911 uint8_t *ipv6_max_lo = &ipv6_max->s6_addr[0] + sizeof(uint64_t);
286de272
DB
1912
1913 ovs_be64 addr6_64_min_hi;
1914 ovs_be64 addr6_64_min_lo;
1915 memcpy(&addr6_64_min_hi, ipv6_min_hi, sizeof addr6_64_min_hi);
1916 memcpy(&addr6_64_min_lo, ipv6_min_lo, sizeof addr6_64_min_lo);
1917
1918 ovs_be64 addr6_64_max_hi;
1919 ovs_be64 addr6_64_max_lo;
1920 memcpy(&addr6_64_max_hi, ipv6_max_hi, sizeof addr6_64_max_hi);
1921 memcpy(&addr6_64_max_lo, ipv6_max_lo, sizeof addr6_64_max_lo);
1922
1923 uint64_t diff;
dec0dbbc 1924
286de272
DB
1925 if (addr6_64_min_hi == addr6_64_max_hi &&
1926 ntohll(addr6_64_min_lo) <= ntohll(addr6_64_max_lo)) {
1927 diff = ntohll(addr6_64_max_lo) - ntohll(addr6_64_min_lo);
1928 } else if (ntohll(addr6_64_min_hi) + 1 == ntohll(addr6_64_max_hi) &&
1929 ntohll(addr6_64_min_lo) > ntohll(addr6_64_max_lo)) {
1930 diff = UINT64_MAX - (ntohll(addr6_64_min_lo) -
1931 ntohll(addr6_64_max_lo) - 1);
1932 } else {
1933 /* Limit address delta supported to 32 bits or 4 billion approximately.
1934 * Possibly, this should be visible to the user through a datapath
1935 * support check, however the practical impact is probably nil. */
1936 diff = 0xfffffffe;
1937 }
dec0dbbc 1938
286de272
DB
1939 if (diff > 0xfffffffe) {
1940 diff = 0xfffffffe;
1941 }
1942 return diff;
1943}
1944
1945/* This function must be used in tandem with nat_ipv6_addrs_delta(), which
1946 * restricts the input parameters. */
a489b168 1947static void
cda1b109 1948nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment)
286de272 1949{
cda1b109
DB
1950 uint8_t *ipv6_hi = &ipv6->s6_addr[0];
1951 uint8_t *ipv6_lo = &ipv6->s6_addr[0] + sizeof(ovs_be64);
286de272
DB
1952 ovs_be64 addr6_64_hi;
1953 ovs_be64 addr6_64_lo;
1954 memcpy(&addr6_64_hi, ipv6_hi, sizeof addr6_64_hi);
1955 memcpy(&addr6_64_lo, ipv6_lo, sizeof addr6_64_lo);
1956
1957 if (UINT64_MAX - increment >= ntohll(addr6_64_lo)) {
1958 addr6_64_lo = htonll(increment + ntohll(addr6_64_lo));
1959 } else if (addr6_64_hi != OVS_BE64_MAX) {
1960 addr6_64_hi = htonll(1 + ntohll(addr6_64_hi));
1961 addr6_64_lo = htonll(increment - (UINT64_MAX -
1962 ntohll(addr6_64_lo) + 1));
1963 } else {
1964 OVS_NOT_REACHED();
1965 }
1966
1967 memcpy(ipv6_hi, &addr6_64_hi, sizeof addr6_64_hi);
1968 memcpy(ipv6_lo, &addr6_64_lo, sizeof addr6_64_lo);
286de272
DB
1969}
1970
1971static uint32_t
1972nat_range_hash(const struct conn *conn, uint32_t basis)
1973{
1974 uint32_t hash = basis;
286de272 1975
92edd073
DB
1976 hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
1977 hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
1978 hash = hash_add(hash,
1979 (conn->nat_info->max_port << 16)
1980 | conn->nat_info->min_port);
92edd073
DB
1981 hash = ct_endpoint_hash_add(hash, &conn->key.src);
1982 hash = ct_endpoint_hash_add(hash, &conn->key.dst);
286de272
DB
1983 hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type);
1984 hash = hash_add(hash, conn->key.nw_proto);
1985 hash = hash_add(hash, conn->key.zone);
92edd073
DB
1986
1987 /* The purpose of the second parameter is to distinguish hashes of data of
1988 * different length; our data always has the same length so there is no
1989 * value in counting. */
1990 return hash_finish(hash, 0);
286de272
DB
1991}
1992
1993static bool
1994nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
1995 struct conn *nat_conn)
1996{
bd5e81a0
DB
1997 enum { MIN_NAT_EPHEMERAL_PORT = 1024,
1998 MAX_NAT_EPHEMERAL_PORT = 65535 };
286de272
DB
1999
2000 uint16_t min_port;
2001 uint16_t max_port;
2002 uint16_t first_port;
286de272
DB
2003 uint32_t hash = nat_range_hash(conn, ct->hash_basis);
2004
2005 if ((conn->nat_info->nat_action & NAT_ACTION_SRC) &&
2006 (!(conn->nat_info->nat_action & NAT_ACTION_SRC_PORT))) {
2007 min_port = ntohs(conn->key.src.port);
2008 max_port = ntohs(conn->key.src.port);
2009 first_port = min_port;
2010 } else if ((conn->nat_info->nat_action & NAT_ACTION_DST) &&
2011 (!(conn->nat_info->nat_action & NAT_ACTION_DST_PORT))) {
2012 min_port = ntohs(conn->key.dst.port);
2013 max_port = ntohs(conn->key.dst.port);
2014 first_port = min_port;
2015 } else {
2016 uint16_t deltap = conn->nat_info->max_port - conn->nat_info->min_port;
2017 uint32_t port_index = hash % (deltap + 1);
2018 first_port = conn->nat_info->min_port + port_index;
2019 min_port = conn->nat_info->min_port;
2020 max_port = conn->nat_info->max_port;
2021 }
2022
2023 uint32_t deltaa = 0;
2024 uint32_t address_index;
cda1b109 2025 union ct_addr ct_addr;
286de272 2026 memset(&ct_addr, 0, sizeof ct_addr);
cda1b109 2027 union ct_addr max_ct_addr;
286de272
DB
2028 memset(&max_ct_addr, 0, sizeof max_ct_addr);
2029 max_ct_addr = conn->nat_info->max_addr;
2030
2031 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
cda1b109
DB
2032 deltaa = ntohl(conn->nat_info->max_addr.ipv4) -
2033 ntohl(conn->nat_info->min_addr.ipv4);
286de272 2034 address_index = hash % (deltaa + 1);
cda1b109
DB
2035 ct_addr.ipv4 = htonl(
2036 ntohl(conn->nat_info->min_addr.ipv4) + address_index);
286de272 2037 } else {
cda1b109
DB
2038 deltaa = nat_ipv6_addrs_delta(&conn->nat_info->min_addr.ipv6,
2039 &conn->nat_info->max_addr.ipv6);
286de272
DB
2040 /* deltaa must be within 32 bits for full hash coverage. A 64 or
2041 * 128 bit hash is unnecessary and hence not used here. Most code
2042 * is kept common with V4; nat_ipv6_addrs_delta() will do the
2043 * enforcement via max_ct_addr. */
2044 max_ct_addr = conn->nat_info->min_addr;
cda1b109 2045 nat_ipv6_addr_increment(&max_ct_addr.ipv6, deltaa);
286de272 2046 address_index = hash % (deltaa + 1);
cda1b109
DB
2047 ct_addr.ipv6 = conn->nat_info->min_addr.ipv6;
2048 nat_ipv6_addr_increment(&ct_addr.ipv6, address_index);
286de272
DB
2049 }
2050
2051 uint16_t port = first_port;
2052 bool all_ports_tried = false;
32b2c81f
DB
2053 /* For DNAT or for specified port ranges, we don't use ephemeral ports. */
2054 bool ephemeral_ports_tried
2055 = conn->nat_info->nat_action & NAT_ACTION_DST ||
2056 conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
2057 ? true : false;
cda1b109 2058 union ct_addr first_addr = ct_addr;
4cd0481c
DB
2059 bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
2060 conn->key.nw_proto != IPPROTO_ICMPV6;
286de272
DB
2061
2062 while (true) {
2063 if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
2064 nat_conn->rev_key.dst.addr = ct_addr;
e32cd4c6 2065 if (pat_enabled) {
2066 nat_conn->rev_key.dst.port = htons(port);
2067 }
286de272 2068 } else {
1c8689d7 2069 nat_conn->rev_key.src.addr = ct_addr;
e32cd4c6 2070 if (pat_enabled) {
2071 nat_conn->rev_key.src.port = htons(port);
2072 }
286de272
DB
2073 }
2074
e32cd4c6 2075 bool found = conn_lookup(ct, &nat_conn->rev_key, time_msec(), NULL,
2076 NULL);
967bb5c5 2077 if (!found) {
286de272 2078 return true;
4cd0481c 2079 } else if (pat_enabled && !all_ports_tried) {
286de272
DB
2080 if (min_port == max_port) {
2081 all_ports_tried = true;
2082 } else if (port == max_port) {
2083 port = min_port;
2084 } else {
2085 port++;
2086 }
2087 if (port == first_port) {
2088 all_ports_tried = true;
2089 }
2090 } else {
2091 if (memcmp(&ct_addr, &max_ct_addr, sizeof ct_addr)) {
2092 if (conn->key.dl_type == htons(ETH_TYPE_IP)) {
cda1b109 2093 ct_addr.ipv4 = htonl(ntohl(ct_addr.ipv4) + 1);
286de272 2094 } else {
cda1b109 2095 nat_ipv6_addr_increment(&ct_addr.ipv6, 1);
286de272
DB
2096 }
2097 } else {
2098 ct_addr = conn->nat_info->min_addr;
2099 }
2100 if (!memcmp(&ct_addr, &first_addr, sizeof ct_addr)) {
4cd0481c 2101 if (pat_enabled && !ephemeral_ports_tried) {
ac04639a 2102 ephemeral_ports_tried = true;
286de272 2103 ct_addr = conn->nat_info->min_addr;
8417e688 2104 first_addr = ct_addr;
286de272
DB
2105 min_port = MIN_NAT_EPHEMERAL_PORT;
2106 max_port = MAX_NAT_EPHEMERAL_PORT;
2107 } else {
2108 break;
2109 }
2110 }
2111 first_port = min_port;
2112 port = first_port;
2113 all_ports_tried = false;
2114 }
2115 }
2116 return false;
2117}
2118
a489b168 2119static enum ct_update_res
967bb5c5
DB
2120conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt,
2121 struct conn_lookup_ctx *ctx, long long now)
a489b168 2122{
967bb5c5
DB
2123 ovs_mutex_lock(&conn->lock);
2124 enum ct_update_res update_res =
2125 l4_protos[conn->key.nw_proto]->conn_update(ct, conn, pkt, ctx->reply,
2126 now);
2127 ovs_mutex_unlock(&conn->lock);
2128 return update_res;
a489b168
DDP
2129}
2130
2131static bool
2132conn_expired(struct conn *conn, long long now)
2133{
286de272 2134 if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
967bb5c5
DB
2135 ovs_mutex_lock(&conn->lock);
2136 bool expired = now >= conn->expiration ? true : false;
2137 ovs_mutex_unlock(&conn->lock);
2138 return expired;
286de272
DB
2139 }
2140 return false;
a489b168
DDP
2141}
2142
2143static bool
2144valid_new(struct dp_packet *pkt, struct conn_key *key)
2145{
2146 return l4_protos[key->nw_proto]->valid_new(pkt);
2147}
2148
2149static struct conn *
967bb5c5
DB
2150new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
2151 long long now)
a489b168 2152{
967bb5c5 2153 return l4_protos[key->nw_proto]->new_conn(ct, pkt, now);
a489b168
DDP
2154}
2155
2156static void
967bb5c5 2157delete_conn_cmn(struct conn *conn)
a489b168 2158{
286de272 2159 free(conn->nat_info);
bd5e81a0 2160 free(conn->alg);
a489b168
DDP
2161 free(conn);
2162}
967bb5c5
DB
2163
2164static void
2165delete_conn(struct conn *conn)
2166{
2167 ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
2168 ovs_mutex_destroy(&conn->lock);
2169 free(conn->nat_conn);
2170 delete_conn_cmn(conn);
2171}
2172
2173/* Only used by conn_clean_one(). */
2174static void
2175delete_conn_one(struct conn *conn)
2176{
2177 if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
2178 ovs_mutex_destroy(&conn->lock);
2179 }
2180 delete_conn_cmn(conn);
2181}
4d4e68ed 2182\f
271e48a0
YHW
2183/* Convert a conntrack address 'a' into an IP address 'b' based on 'dl_type'.
2184 *
2185 * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
2186 * in network-byte order. */
4d4e68ed 2187static void
cda1b109 2188ct_endpoint_to_ct_dpif_inet_addr(const union ct_addr *a,
4d4e68ed
DDP
2189 union ct_dpif_inet_addr *b,
2190 ovs_be16 dl_type)
2191{
2192 if (dl_type == htons(ETH_TYPE_IP)) {
cda1b109 2193 b->ip = a->ipv4;
4d4e68ed 2194 } else if (dl_type == htons(ETH_TYPE_IPV6)){
cda1b109 2195 b->in6 = a->ipv6;
4d4e68ed
DDP
2196 }
2197}
2198
271e48a0
YHW
2199/* Convert an IP address 'a' into a conntrack address 'b' based on 'dl_type'.
2200 *
2201 * Note that 'dl_type' should be either "ETH_TYPE_IP" or "ETH_TYPE_IPv6"
2202 * in network-byte order. */
2203static void
2204ct_dpif_inet_addr_to_ct_endpoint(const union ct_dpif_inet_addr *a,
cda1b109 2205 union ct_addr *b, ovs_be16 dl_type)
271e48a0
YHW
2206{
2207 if (dl_type == htons(ETH_TYPE_IP)) {
cda1b109 2208 b->ipv4 = a->ip;
271e48a0 2209 } else if (dl_type == htons(ETH_TYPE_IPV6)){
cda1b109 2210 b->ipv6 = a->in6;
271e48a0
YHW
2211 }
2212}
2213
4d4e68ed
DDP
2214static void
2215conn_key_to_tuple(const struct conn_key *key, struct ct_dpif_tuple *tuple)
2216{
2217 if (key->dl_type == htons(ETH_TYPE_IP)) {
2218 tuple->l3_type = AF_INET;
2219 } else if (key->dl_type == htons(ETH_TYPE_IPV6)) {
2220 tuple->l3_type = AF_INET6;
2221 }
2222 tuple->ip_proto = key->nw_proto;
2223 ct_endpoint_to_ct_dpif_inet_addr(&key->src.addr, &tuple->src,
2224 key->dl_type);
2225 ct_endpoint_to_ct_dpif_inet_addr(&key->dst.addr, &tuple->dst,
2226 key->dl_type);
2227
2228 if (key->nw_proto == IPPROTO_ICMP || key->nw_proto == IPPROTO_ICMPV6) {
b269a122
DDP
2229 tuple->icmp_id = key->src.icmp_id;
2230 tuple->icmp_type = key->src.icmp_type;
2231 tuple->icmp_code = key->src.icmp_code;
4d4e68ed
DDP
2232 } else {
2233 tuple->src_port = key->src.port;
2234 tuple->dst_port = key->dst.port;
2235 }
2236}
2237
271e48a0
YHW
2238static void
2239tuple_to_conn_key(const struct ct_dpif_tuple *tuple, uint16_t zone,
2240 struct conn_key *key)
2241{
2242 if (tuple->l3_type == AF_INET) {
2243 key->dl_type = htons(ETH_TYPE_IP);
2244 } else if (tuple->l3_type == AF_INET6) {
2245 key->dl_type = htons(ETH_TYPE_IPV6);
2246 }
2247 key->nw_proto = tuple->ip_proto;
2248 ct_dpif_inet_addr_to_ct_endpoint(&tuple->src, &key->src.addr,
2249 key->dl_type);
2250 ct_dpif_inet_addr_to_ct_endpoint(&tuple->dst, &key->dst.addr,
2251 key->dl_type);
2252
2253 if (tuple->ip_proto == IPPROTO_ICMP || tuple->ip_proto == IPPROTO_ICMPV6) {
2254 key->src.icmp_id = tuple->icmp_id;
2255 key->src.icmp_type = tuple->icmp_type;
2256 key->src.icmp_code = tuple->icmp_code;
2257 key->dst.icmp_id = tuple->icmp_id;
2258 key->dst.icmp_type = reverse_icmp_type(tuple->icmp_type);
2259 key->dst.icmp_code = tuple->icmp_code;
2260 } else {
2261 key->src.port = tuple->src_port;
2262 key->dst.port = tuple->dst_port;
2263 }
2264 key->zone = zone;
2265}
2266
4d4e68ed
DDP
2267static void
2268conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry,
f1a0469e 2269 long long now)
4d4e68ed 2270{
4d4e68ed
DDP
2271 memset(entry, 0, sizeof *entry);
2272 conn_key_to_tuple(&conn->key, &entry->tuple_orig);
2273 conn_key_to_tuple(&conn->rev_key, &entry->tuple_reply);
2274
2275 entry->zone = conn->key.zone;
4d4e68ed 2276
967bb5c5
DB
2277 ovs_mutex_lock(&conn->lock);
2278 entry->mark = conn->mark;
286de272 2279 memcpy(&entry->labels, &conn->label, sizeof entry->labels);
4d4e68ed 2280
dec0dbbc 2281 long long expiration = conn->expiration - now;
4d4e68ed 2282
dec0dbbc 2283 struct ct_l4_proto *class = l4_protos[conn->key.nw_proto];
4d4e68ed
DDP
2284 if (class->conn_get_protoinfo) {
2285 class->conn_get_protoinfo(conn, &entry->protoinfo);
2286 }
f1a0469e 2287 ovs_mutex_unlock(&conn->lock);
bd5e81a0 2288
f1a0469e 2289 entry->timeout = (expiration > 0) ? expiration / 1000 : 0;
bd5e81a0
DB
2290
2291 if (conn->alg) {
2292 /* Caller is responsible for freeing. */
2293 entry->helper.name = xstrdup(conn->alg);
2294 }
4d4e68ed
DDP
2295}
2296
4ea96698
DB
2297struct ipf *
2298conntrack_ipf_ctx(struct conntrack *ct)
2299{
2300 return ct->ipf;
2301}
2302
4d4e68ed
DDP
2303int
2304conntrack_dump_start(struct conntrack *ct, struct conntrack_dump *dump,
ded30c74 2305 const uint16_t *pzone, int *ptot_bkts)
4d4e68ed
DDP
2306{
2307 memset(dump, 0, sizeof(*dump));
dec0dbbc 2308
4d4e68ed
DDP
2309 if (pzone) {
2310 dump->zone = *pzone;
2311 dump->filter_zone = true;
2312 }
4d4e68ed 2313
dec0dbbc 2314 dump->ct = ct;
967bb5c5 2315 *ptot_bkts = 1; /* Need to clean up the callers. */
4d4e68ed
DDP
2316 return 0;
2317}
2318
2319int
2320conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry)
2321{
2322 struct conntrack *ct = dump->ct;
2323 long long now = time_msec();
2324
967bb5c5
DB
2325 for (;;) {
2326 struct cmap_node *cm_node = cmap_next_position(&ct->conns,
2327 &dump->cm_pos);
2328 if (!cm_node) {
2329 break;
4d4e68ed 2330 }
967bb5c5
DB
2331 struct conn *conn;
2332 INIT_CONTAINER(conn, cm_node, cm_node);
2333 if ((!dump->filter_zone || conn->key.zone == dump->zone) &&
2334 (conn->conn_type != CT_CONN_TYPE_UN_NAT)) {
f1a0469e 2335 conn_to_ct_dpif_entry(conn, entry, now);
4d4e68ed
DDP
2336 return 0;
2337 }
2338 }
967bb5c5 2339
4d4e68ed
DDP
2340 return EOF;
2341}
2342
2343int
2344conntrack_dump_done(struct conntrack_dump *dump OVS_UNUSED)
2345{
2346 return 0;
2347}
5d9cbb4c
DDP
2348
2349int
2350conntrack_flush(struct conntrack *ct, const uint16_t *zone)
2351{
967bb5c5
DB
2352 struct conn *conn;
2353
2354 ovs_mutex_lock(&ct->ct_lock);
2355 CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
2356 if (!zone || *zone == conn->key.zone) {
2357 conn_clean_one(ct, conn);
5d9cbb4c 2358 }
5d9cbb4c 2359 }
967bb5c5 2360 ovs_mutex_unlock(&ct->ct_lock);
bd5e81a0 2361
5d9cbb4c
DDP
2362 return 0;
2363}
bd5e81a0 2364
271e48a0
YHW
2365int
2366conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple,
2367 uint16_t zone)
2368{
271e48a0 2369 int error = 0;
4048c508
DB
2370 struct conn_key key;
2371 struct conn *conn;
271e48a0 2372
4048c508
DB
2373 memset(&key, 0, sizeof(key));
2374 tuple_to_conn_key(tuple, zone, &key);
967bb5c5 2375 ovs_mutex_lock(&ct->ct_lock);
4048c508 2376 conn_lookup(ct, &key, time_msec(), &conn, NULL);
271e48a0 2377
4048c508
DB
2378 if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) {
2379 conn_clean(ct, conn);
271e48a0 2380 } else {
a1d5eeff 2381 VLOG_WARN("Must flush tuple using the original pre-NATed tuple");
271e48a0
YHW
2382 error = ENOENT;
2383 }
967bb5c5
DB
2384
2385 ovs_mutex_unlock(&ct->ct_lock);
271e48a0
YHW
2386 return error;
2387}
2388
c92339ad
DB
2389int
2390conntrack_set_maxconns(struct conntrack *ct, uint32_t maxconns)
2391{
2392 atomic_store_relaxed(&ct->n_conn_limit, maxconns);
2393 return 0;
2394}
2395
2396int
2397conntrack_get_maxconns(struct conntrack *ct, uint32_t *maxconns)
2398{
2399 atomic_read_relaxed(&ct->n_conn_limit, maxconns);
2400 return 0;
2401}
2402
875075b3
DB
2403int
2404conntrack_get_nconns(struct conntrack *ct, uint32_t *nconns)
2405{
2406 *nconns = atomic_count_get(&ct->n_conn);
2407 return 0;
2408}
2409
bd5e81a0
DB
2410/* This function must be called with the ct->resources read lock taken. */
2411static struct alg_exp_node *
be38342d
DB
2412expectation_lookup(struct hmap *alg_expectations, const struct conn_key *key,
2413 uint32_t basis, bool src_ip_wc)
bd5e81a0 2414{
c3f6bae2
DB
2415 struct conn_key check_key;
2416 memcpy(&check_key, key, sizeof check_key);
bd5e81a0 2417 check_key.src.port = ALG_WC_SRC_PORT;
dec0dbbc 2418
be38342d
DB
2419 if (src_ip_wc) {
2420 memset(&check_key.src.addr, 0, sizeof check_key.src.addr);
2421 }
dec0dbbc 2422
bd5e81a0
DB
2423 struct alg_exp_node *alg_exp_node;
2424
bd5e81a0 2425 HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node,
dec0dbbc 2426 conn_key_hash(&check_key, basis),
bd5e81a0
DB
2427 alg_expectations) {
2428 if (!conn_key_cmp(&alg_exp_node->key, &check_key)) {
2429 return alg_exp_node;
2430 }
2431 }
2432 return NULL;
2433}
2434
4417ca3d
DB
2435/* This function must be called with the ct->resources write lock taken. */
2436static void
2437expectation_remove(struct hmap *alg_expectations,
2438 const struct conn_key *key, uint32_t basis)
2439{
2440 struct alg_exp_node *alg_exp_node;
2441
2442 HMAP_FOR_EACH_WITH_HASH (alg_exp_node, node, conn_key_hash(key, basis),
2443 alg_expectations) {
2444 if (!conn_key_cmp(&alg_exp_node->key, key)) {
2445 hmap_remove(alg_expectations, &alg_exp_node->node);
2446 break;
2447 }
2448 }
2449}
2450
2451/* This function must be called with the ct->resources read lock taken. */
2452static struct alg_exp_node *
2453expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs,
2454 const struct conn_key *master_key,
2455 const struct conn_key *alg_exp_key,
2456 uint32_t basis)
2457{
2458 struct alg_exp_node *alg_exp_node;
2459
2460 HINDEX_FOR_EACH_WITH_HASH (alg_exp_node, node_ref,
2461 conn_key_hash(master_key, basis),
2462 alg_expectation_refs) {
2463 if (!conn_key_cmp(&alg_exp_node->master_key, master_key) &&
2464 !conn_key_cmp(&alg_exp_node->key, alg_exp_key)) {
2465 return alg_exp_node;
2466 }
2467 }
2468 return NULL;
2469}
2470
2471/* This function must be called with the ct->resources write lock taken. */
2472static void
2473expectation_ref_create(struct hindex *alg_expectation_refs,
2474 struct alg_exp_node *alg_exp_node,
2475 uint32_t basis)
2476{
2477 if (!expectation_ref_lookup_unique(alg_expectation_refs,
2478 &alg_exp_node->master_key,
2479 &alg_exp_node->key, basis)) {
2480 hindex_insert(alg_expectation_refs, &alg_exp_node->node_ref,
2481 conn_key_hash(&alg_exp_node->master_key, basis));
2482 }
2483}
2484
2485static void
967bb5c5 2486expectation_clean(struct conntrack *ct, const struct conn_key *master_key)
4417ca3d 2487{
967bb5c5 2488 ovs_rwlock_wrlock(&ct->resources_lock);
4417ca3d
DB
2489
2490 struct alg_exp_node *node, *next;
2491 HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref,
967bb5c5 2492 conn_key_hash(master_key, ct->hash_basis),
4417ca3d
DB
2493 &ct->alg_expectation_refs) {
2494 if (!conn_key_cmp(&node->master_key, master_key)) {
967bb5c5
DB
2495 expectation_remove(&ct->alg_expectations, &node->key,
2496 ct->hash_basis);
4417ca3d
DB
2497 hindex_remove(&ct->alg_expectation_refs, &node->node_ref);
2498 free(node);
2499 }
2500 }
2501
967bb5c5 2502 ovs_rwlock_unlock(&ct->resources_lock);
4417ca3d
DB
2503}
2504
bd5e81a0 2505static void
be38342d
DB
2506expectation_create(struct conntrack *ct, ovs_be16 dst_port,
2507 const struct conn *master_conn, bool reply, bool src_ip_wc,
2508 bool skip_nat)
bd5e81a0 2509{
cda1b109
DB
2510 union ct_addr src_addr;
2511 union ct_addr dst_addr;
2512 union ct_addr alg_nat_repl_addr;
be38342d 2513 struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node);
bd5e81a0 2514
be38342d 2515 if (reply) {
bd5e81a0
DB
2516 src_addr = master_conn->key.src.addr;
2517 dst_addr = master_conn->key.dst.addr;
efa29a89 2518 alg_exp_node->nat_rpl_dst = true;
be38342d
DB
2519 if (skip_nat) {
2520 alg_nat_repl_addr = dst_addr;
efa29a89
DM
2521 } else if (master_conn->nat_info &&
2522 master_conn->nat_info->nat_action & NAT_ACTION_DST) {
2523 alg_nat_repl_addr = master_conn->rev_key.src.addr;
2524 alg_exp_node->nat_rpl_dst = false;
be38342d
DB
2525 } else {
2526 alg_nat_repl_addr = master_conn->rev_key.dst.addr;
2527 }
be38342d
DB
2528 } else {
2529 src_addr = master_conn->rev_key.src.addr;
2530 dst_addr = master_conn->rev_key.dst.addr;
efa29a89 2531 alg_exp_node->nat_rpl_dst = false;
be38342d
DB
2532 if (skip_nat) {
2533 alg_nat_repl_addr = src_addr;
efa29a89
DM
2534 } else if (master_conn->nat_info &&
2535 master_conn->nat_info->nat_action & NAT_ACTION_DST) {
2536 alg_nat_repl_addr = master_conn->key.dst.addr;
2537 alg_exp_node->nat_rpl_dst = true;
be38342d
DB
2538 } else {
2539 alg_nat_repl_addr = master_conn->key.src.addr;
2540 }
be38342d
DB
2541 }
2542 if (src_ip_wc) {
2543 memset(&src_addr, 0, sizeof src_addr);
bd5e81a0
DB
2544 }
2545
bd5e81a0
DB
2546 alg_exp_node->key.dl_type = master_conn->key.dl_type;
2547 alg_exp_node->key.nw_proto = master_conn->key.nw_proto;
2548 alg_exp_node->key.zone = master_conn->key.zone;
2549 alg_exp_node->key.src.addr = src_addr;
2550 alg_exp_node->key.dst.addr = dst_addr;
2551 alg_exp_node->key.src.port = ALG_WC_SRC_PORT;
2552 alg_exp_node->key.dst.port = dst_port;
2553 alg_exp_node->master_mark = master_conn->mark;
2554 alg_exp_node->master_label = master_conn->label;
82b9ac94
DB
2555 memcpy(&alg_exp_node->master_key, &master_conn->key,
2556 sizeof alg_exp_node->master_key);
bd5e81a0
DB
2557 /* Take the write lock here because it is almost 100%
2558 * likely that the lookup will fail and
2559 * expectation_create() will be called below. */
967bb5c5 2560 ovs_rwlock_wrlock(&ct->resources_lock);
bd5e81a0 2561 struct alg_exp_node *alg_exp = expectation_lookup(
be38342d 2562 &ct->alg_expectations, &alg_exp_node->key, ct->hash_basis, src_ip_wc);
bd5e81a0
DB
2563 if (alg_exp) {
2564 free(alg_exp_node);
967bb5c5 2565 ovs_rwlock_unlock(&ct->resources_lock);
bd5e81a0
DB
2566 return;
2567 }
2568
2569 alg_exp_node->alg_nat_repl_addr = alg_nat_repl_addr;
4417ca3d 2570 hmap_insert(&ct->alg_expectations, &alg_exp_node->node,
dec0dbbc 2571 conn_key_hash(&alg_exp_node->key, ct->hash_basis));
4417ca3d
DB
2572 expectation_ref_create(&ct->alg_expectation_refs, alg_exp_node,
2573 ct->hash_basis);
967bb5c5 2574 ovs_rwlock_unlock(&ct->resources_lock);
bd5e81a0
DB
2575}
2576
bd5e81a0
DB
2577static void
2578replace_substring(char *substr, uint8_t substr_size,
2579 uint8_t total_size, char *rep_str,
2580 uint8_t rep_str_size)
2581{
2582 memmove(substr + rep_str_size, substr + substr_size,
2583 total_size - substr_size);
2584 memcpy(substr, rep_str, rep_str_size);
2585}
2586
cd7c99a6
DB
2587static void
2588repl_bytes(char *str, char c1, char c2)
2589{
2590 while (*str) {
2591 if (*str == c1) {
2592 *str = c2;
2593 }
2594 str++;
2595 }
2596}
2597
2598static void
2599modify_packet(struct dp_packet *pkt, char *pkt_str, size_t size,
2600 char *repl_str, size_t repl_size,
2601 uint32_t orig_used_size)
2602{
2603 replace_substring(pkt_str, size,
2604 (const char *) dp_packet_tail(pkt) - pkt_str,
2605 repl_str, repl_size);
2606 dp_packet_set_size(pkt, orig_used_size + (int) repl_size - (int) size);
2607}
2608
bd5e81a0
DB
2609/* Replace IPV4 address in FTP message with NATed address. */
2610static int
2611repl_ftp_v4_addr(struct dp_packet *pkt, ovs_be32 v4_addr_rep,
2612 char *ftp_data_start,
cd7c99a6
DB
2613 size_t addr_offset_from_ftp_data_start,
2614 size_t addr_size OVS_UNUSED)
bd5e81a0
DB
2615{
2616 enum { MAX_FTP_V4_NAT_DELTA = 8 };
2617
2618 /* Do conservative check for pathological MTU usage. */
2619 uint32_t orig_used_size = dp_packet_size(pkt);
cd7c99a6
DB
2620 if (orig_used_size + MAX_FTP_V4_NAT_DELTA >
2621 dp_packet_get_allocated(pkt)) {
2622
bd5e81a0 2623 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
cd7c99a6
DB
2624 VLOG_WARN_RL(&rl, "Unsupported effective MTU %u used with FTP V4",
2625 dp_packet_get_allocated(pkt));
bd5e81a0
DB
2626 return 0;
2627 }
2628
cd7c99a6
DB
2629 char v4_addr_str[INET_ADDRSTRLEN] = {0};
2630 ovs_assert(inet_ntop(AF_INET, &v4_addr_rep, v4_addr_str,
2631 sizeof v4_addr_str));
2632 repl_bytes(v4_addr_str, '.', ',');
2633 modify_packet(pkt, ftp_data_start + addr_offset_from_ftp_data_start,
2634 addr_size, v4_addr_str, strlen(v4_addr_str),
2635 orig_used_size);
2636 return (int) strlen(v4_addr_str) - (int) addr_size;
bd5e81a0
DB
2637}
2638
2639static char *
2640skip_non_digits(char *str)
2641{
2642 while (!isdigit(*str) && *str != 0) {
2643 str++;
2644 }
2645 return str;
2646}
2647
2648static char *
2649terminate_number_str(char *str, uint8_t max_digits)
2650{
2651 uint8_t digits_found = 0;
2652 while (isdigit(*str) && digits_found <= max_digits) {
2653 str++;
2654 digits_found++;
2655 }
2656
2657 *str = 0;
2658 return str;
2659}
2660
2661
2662static void
2663get_ftp_ctl_msg(struct dp_packet *pkt, char *ftp_msg)
2664{
2665 struct tcp_header *th = dp_packet_l4(pkt);
2666 char *tcp_hdr = (char *) th;
2667 uint32_t tcp_payload_len = tcp_payload_length(pkt);
2668 size_t tcp_payload_of_interest = MIN(tcp_payload_len,
2669 LARGEST_FTP_MSG_OF_INTEREST);
2670 size_t tcp_hdr_len = TCP_OFFSET(th->tcp_ctl) * 4;
2671
2672 ovs_strlcpy(ftp_msg, tcp_hdr + tcp_hdr_len,
2673 tcp_payload_of_interest);
2674}
2675
2676static enum ftp_ctl_pkt
2677detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
2678 struct dp_packet *pkt)
2679{
bd5e81a0
DB
2680 char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0};
2681 get_ftp_ctl_msg(pkt, ftp_msg);
dec0dbbc 2682
bd5e81a0
DB
2683 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
2684 if (strncasecmp(ftp_msg, FTP_EPRT_CMD, strlen(FTP_EPRT_CMD)) &&
2685 !strcasestr(ftp_msg, FTP_EPSV_REPLY)) {
2686 return CT_FTP_CTL_OTHER;
2687 }
2688 } else {
2689 if (strncasecmp(ftp_msg, FTP_PORT_CMD, strlen(FTP_PORT_CMD)) &&
2690 strncasecmp(ftp_msg, FTP_PASV_REPLY_CODE,
2691 strlen(FTP_PASV_REPLY_CODE))) {
2692 return CT_FTP_CTL_OTHER;
2693 }
2694 }
2695
2696 return CT_FTP_CTL_INTEREST;
2697}
2698
2699static enum ftp_ctl_pkt
2700process_ftp_ctl_v4(struct conntrack *ct,
2701 struct dp_packet *pkt,
2702 const struct conn *conn_for_expectation,
4417ca3d 2703 ovs_be32 *v4_addr_rep,
bd5e81a0 2704 char **ftp_data_v4_start,
cd7c99a6
DB
2705 size_t *addr_offset_from_ftp_data_start,
2706 size_t *addr_size)
bd5e81a0
DB
2707{
2708 struct tcp_header *th = dp_packet_l4(pkt);
2709 size_t tcp_hdr_len = TCP_OFFSET(th->tcp_ctl) * 4;
2710 char *tcp_hdr = (char *) th;
2711 *ftp_data_v4_start = tcp_hdr + tcp_hdr_len;
2712 char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0};
2713 get_ftp_ctl_msg(pkt, ftp_msg);
bd5e81a0
DB
2714 char *ftp = ftp_msg;
2715 enum ct_alg_mode mode;
dec0dbbc 2716
23bea975 2717 if (!strncasecmp(ftp, FTP_PORT_CMD, strlen(FTP_PORT_CMD))) {
bd5e81a0
DB
2718 ftp = ftp_msg + strlen(FTP_PORT_CMD);
2719 mode = CT_FTP_MODE_ACTIVE;
2720 } else {
2721 ftp = ftp_msg + strlen(FTP_PASV_REPLY_CODE);
2722 mode = CT_FTP_MODE_PASSIVE;
2723 }
2724
2725 /* Find first space. */
2726 ftp = strchr(ftp, ' ');
2727 if (!ftp) {
2728 return CT_FTP_CTL_INVALID;
2729 }
2730
2731 /* Find the first digit, after space. */
2732 ftp = skip_non_digits(ftp);
2733 if (*ftp == 0) {
2734 return CT_FTP_CTL_INVALID;
2735 }
2736
2737 char *ip_addr_start = ftp;
2738 *addr_offset_from_ftp_data_start = ip_addr_start - ftp_msg;
bd5e81a0 2739
dec0dbbc 2740 uint8_t comma_count = 0;
bd5e81a0
DB
2741 while (comma_count < 4 && *ftp) {
2742 if (*ftp == ',') {
2743 comma_count++;
2744 if (comma_count == 4) {
2745 *ftp = 0;
2746 } else {
2747 *ftp = '.';
2748 }
2749 }
2750 ftp++;
2751 }
2752 if (comma_count != 4) {
2753 return CT_FTP_CTL_INVALID;
2754 }
2755
2756 struct in_addr ip_addr;
2757 int rc2 = inet_pton(AF_INET, ip_addr_start, &ip_addr);
2758 if (rc2 != 1) {
2759 return CT_FTP_CTL_INVALID;
2760 }
2761
cd7c99a6 2762 *addr_size = ftp - ip_addr_start - 1;
bd5e81a0
DB
2763 char *save_ftp = ftp;
2764 ftp = terminate_number_str(ftp, MAX_FTP_PORT_DGTS);
2765 if (!ftp) {
2766 return CT_FTP_CTL_INVALID;
2767 }
2768 int value;
2769 if (!str_to_int(save_ftp, 10, &value)) {
2770 return CT_FTP_CTL_INVALID;
2771 }
2772
2773 /* This is derived from the L4 port maximum is 65535. */
2774 if (value > 255) {
2775 return CT_FTP_CTL_INVALID;
2776 }
2777
2778 uint16_t port_hs = value;
2779 port_hs <<= 8;
2780
2781 /* Skip over comma. */
2782 ftp++;
2783 save_ftp = ftp;
2784 bool digit_found = false;
2785 while (isdigit(*ftp)) {
2786 ftp++;
2787 digit_found = true;
2788 }
2789 if (!digit_found) {
2790 return CT_FTP_CTL_INVALID;
2791 }
2792 *ftp = 0;
2793 if (!str_to_int(save_ftp, 10, &value)) {
2794 return CT_FTP_CTL_INVALID;
2795 }
2796
2797 if (value > 255) {
2798 return CT_FTP_CTL_INVALID;
2799 }
2800
78a0b272 2801 port_hs |= value;
bd5e81a0
DB
2802 ovs_be16 port = htons(port_hs);
2803 ovs_be32 conn_ipv4_addr;
2804
2805 switch (mode) {
2806 case CT_FTP_MODE_ACTIVE:
cda1b109
DB
2807 *v4_addr_rep = conn_for_expectation->rev_key.dst.addr.ipv4;
2808 conn_ipv4_addr = conn_for_expectation->key.src.addr.ipv4;
bd5e81a0
DB
2809 break;
2810 case CT_FTP_MODE_PASSIVE:
cda1b109
DB
2811 *v4_addr_rep = conn_for_expectation->key.dst.addr.ipv4;
2812 conn_ipv4_addr = conn_for_expectation->rev_key.src.addr.ipv4;
bd5e81a0 2813 break;
7be77cb0 2814 case CT_TFTP_MODE:
bd5e81a0
DB
2815 default:
2816 OVS_NOT_REACHED();
2817 }
2818
2819 ovs_be32 ftp_ipv4_addr;
2820 ftp_ipv4_addr = ip_addr.s_addr;
2821 /* Although most servers will block this exploit, there may be some
2822 * less well managed. */
2823 if (ftp_ipv4_addr != conn_ipv4_addr && ftp_ipv4_addr != *v4_addr_rep) {
2824 return CT_FTP_CTL_INVALID;
2825 }
2826
be38342d
DB
2827 expectation_create(ct, port, conn_for_expectation,
2828 !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
bd5e81a0
DB
2829 return CT_FTP_CTL_INTEREST;
2830}
2831
2832static char *
2833skip_ipv6_digits(char *str)
2834{
2835 while (isxdigit(*str) || *str == ':' || *str == '.') {
2836 str++;
2837 }
2838 return str;
2839}
2840
2841static enum ftp_ctl_pkt
2842process_ftp_ctl_v6(struct conntrack *ct,
2843 struct dp_packet *pkt,
2844 const struct conn *conn_for_expectation,
cda1b109 2845 union ct_addr *v6_addr_rep, char **ftp_data_start,
bd5e81a0
DB
2846 size_t *addr_offset_from_ftp_data_start,
2847 size_t *addr_size, enum ct_alg_mode *mode)
2848{
2849 struct tcp_header *th = dp_packet_l4(pkt);
2850 size_t tcp_hdr_len = TCP_OFFSET(th->tcp_ctl) * 4;
2851 char *tcp_hdr = (char *) th;
2852 char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0};
bd5e81a0
DB
2853 get_ftp_ctl_msg(pkt, ftp_msg);
2854 *ftp_data_start = tcp_hdr + tcp_hdr_len;
bd5e81a0
DB
2855 char *ftp = ftp_msg;
2856 struct in6_addr ip6_addr;
dec0dbbc 2857
23bea975 2858 if (!strncasecmp(ftp, FTP_EPRT_CMD, strlen(FTP_EPRT_CMD))) {
bd5e81a0
DB
2859 ftp = ftp_msg + strlen(FTP_EPRT_CMD);
2860 ftp = skip_non_digits(ftp);
2861 if (*ftp != FTP_AF_V6 || isdigit(ftp[1])) {
2862 return CT_FTP_CTL_INVALID;
2863 }
2864 /* Jump over delimiter. */
2865 ftp += 2;
2866
bd5e81a0 2867 memset(&ip6_addr, 0, sizeof ip6_addr);
dec0dbbc 2868 char *ip_addr_start = ftp;
bd5e81a0
DB
2869 *addr_offset_from_ftp_data_start = ip_addr_start - ftp_msg;
2870 ftp = skip_ipv6_digits(ftp);
2871 *ftp = 0;
2872 *addr_size = ftp - ip_addr_start;
2873 int rc2 = inet_pton(AF_INET6, ip_addr_start, &ip6_addr);
2874 if (rc2 != 1) {
2875 return CT_FTP_CTL_INVALID;
2876 }
2877 ftp++;
2878 *mode = CT_FTP_MODE_ACTIVE;
2879 } else {
2880 ftp = ftp_msg + strcspn(ftp_msg, "(");
2881 ftp = skip_non_digits(ftp);
2882 if (!isdigit(*ftp)) {
2883 return CT_FTP_CTL_INVALID;
2884 }
2885
2886 /* Not used for passive mode. */
2887 *addr_offset_from_ftp_data_start = 0;
2888 *addr_size = 0;
2889
2890 *mode = CT_FTP_MODE_PASSIVE;
2891 }
2892
2893 char *save_ftp = ftp;
2894 ftp = terminate_number_str(ftp, MAX_EXT_FTP_PORT_DGTS);
2895 if (!ftp) {
2896 return CT_FTP_CTL_INVALID;
2897 }
dec0dbbc 2898
bd5e81a0
DB
2899 int value;
2900 if (!str_to_int(save_ftp, 10, &value)) {
2901 return CT_FTP_CTL_INVALID;
2902 }
2903 if (value > CT_MAX_L4_PORT) {
2904 return CT_FTP_CTL_INVALID;
2905 }
2906
2907 uint16_t port_hs = value;
2908 ovs_be16 port = htons(port_hs);
2909
2910 switch (*mode) {
2911 case CT_FTP_MODE_ACTIVE:
2912 *v6_addr_rep = conn_for_expectation->rev_key.dst.addr;
2913 /* Although most servers will block this exploit, there may be some
2914 * less well managed. */
cda1b109
DB
2915 if (memcmp(&ip6_addr, &v6_addr_rep->ipv6, sizeof ip6_addr) &&
2916 memcmp(&ip6_addr, &conn_for_expectation->key.src.addr.ipv6,
bd5e81a0
DB
2917 sizeof ip6_addr)) {
2918 return CT_FTP_CTL_INVALID;
2919 }
2920 break;
2921 case CT_FTP_MODE_PASSIVE:
2922 *v6_addr_rep = conn_for_expectation->key.dst.addr;
2923 break;
7be77cb0 2924 case CT_TFTP_MODE:
bd5e81a0
DB
2925 default:
2926 OVS_NOT_REACHED();
2927 }
2928
be38342d
DB
2929 expectation_create(ct, port, conn_for_expectation,
2930 !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
bd5e81a0
DB
2931 return CT_FTP_CTL_INTEREST;
2932}
2933
2934static int
cda1b109 2935repl_ftp_v6_addr(struct dp_packet *pkt, union ct_addr v6_addr_rep,
bd5e81a0
DB
2936 char *ftp_data_start,
2937 size_t addr_offset_from_ftp_data_start,
2938 size_t addr_size, enum ct_alg_mode mode)
2939{
2940 /* This is slightly bigger than really possible. */
2941 enum { MAX_FTP_V6_NAT_DELTA = 45 };
2942
2943 if (mode == CT_FTP_MODE_PASSIVE) {
2944 return 0;
2945 }
2946
2947 /* Do conservative check for pathological MTU usage. */
2948 uint32_t orig_used_size = dp_packet_size(pkt);
cd7c99a6
DB
2949 if (orig_used_size + MAX_FTP_V6_NAT_DELTA >
2950 dp_packet_get_allocated(pkt)) {
2951
bd5e81a0 2952 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
cd7c99a6
DB
2953 VLOG_WARN_RL(&rl, "Unsupported effective MTU %u used with FTP V6",
2954 dp_packet_get_allocated(pkt));
bd5e81a0
DB
2955 return 0;
2956 }
2957
298530b8 2958 char v6_addr_str[INET6_ADDRSTRLEN] = {0};
cda1b109 2959 ovs_assert(inet_ntop(AF_INET6, &v6_addr_rep.ipv6, v6_addr_str,
298530b8 2960 sizeof v6_addr_str));
cd7c99a6
DB
2961 modify_packet(pkt, ftp_data_start + addr_offset_from_ftp_data_start,
2962 addr_size, v6_addr_str, strlen(v6_addr_str),
2963 orig_used_size);
2964 return (int) strlen(v6_addr_str) - (int) addr_size;
bd5e81a0
DB
2965}
2966
d13d7115
DB
2967/* Increment/decrement a TCP sequence number. */
2968static void
2969adj_seqnum(ovs_16aligned_be32 *val, int32_t inc)
2970{
2971 put_16aligned_be32(val, htonl(ntohl(get_16aligned_be32(val)) + inc));
2972}
2973
bd5e81a0
DB
2974static void
2975handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
967bb5c5 2976 struct dp_packet *pkt, struct conn *ec, long long now,
253e4dc0 2977 enum ftp_ctl_pkt ftp_ctl, bool nat)
bd5e81a0
DB
2978{
2979 struct ip_header *l3_hdr = dp_packet_l3(pkt);
2980 ovs_be32 v4_addr_rep = 0;
cda1b109 2981 union ct_addr v6_addr_rep;
faa0826d 2982 size_t addr_offset_from_ftp_data_start = 0;
bd5e81a0
DB
2983 size_t addr_size = 0;
2984 char *ftp_data_start;
bd5e81a0
DB
2985 enum ct_alg_mode mode = CT_FTP_MODE_ACTIVE;
2986
2987 if (detect_ftp_ctl_type(ctx, pkt) != ftp_ctl) {
2988 return;
2989 }
2990
bd5e81a0
DB
2991 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
2992 int64_t seq_skew = 0;
dec0dbbc 2993
253e4dc0 2994 if (ftp_ctl == CT_FTP_CTL_INTEREST) {
bd5e81a0
DB
2995 enum ftp_ctl_pkt rc;
2996 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
253e4dc0 2997 rc = process_ftp_ctl_v6(ct, pkt, ec,
4417ca3d 2998 &v6_addr_rep, &ftp_data_start,
bd5e81a0
DB
2999 &addr_offset_from_ftp_data_start,
3000 &addr_size, &mode);
3001 } else {
253e4dc0 3002 rc = process_ftp_ctl_v4(ct, pkt, ec,
4417ca3d 3003 &v4_addr_rep, &ftp_data_start,
cd7c99a6
DB
3004 &addr_offset_from_ftp_data_start,
3005 &addr_size);
bd5e81a0
DB
3006 }
3007 if (rc == CT_FTP_CTL_INVALID) {
3008 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
3009 VLOG_WARN_RL(&rl, "Invalid FTP control packet format");
3010 pkt->md.ct_state |= CS_TRACKED | CS_INVALID;
3011 return;
3012 } else if (rc == CT_FTP_CTL_INTEREST) {
3013 uint16_t ip_len;
dec0dbbc 3014
bd5e81a0 3015 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
253e4dc0
DM
3016 if (nat) {
3017 seq_skew = repl_ftp_v6_addr(pkt, v6_addr_rep,
3018 ftp_data_start,
3019 addr_offset_from_ftp_data_start,
3020 addr_size, mode);
3021 }
3022
bd5e81a0 3023 if (seq_skew) {
253e4dc0
DM
3024 ip_len = ntohs(nh6->ip6_ctlun.ip6_un1.ip6_un1_plen) +
3025 seq_skew;
bd5e81a0 3026 nh6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(ip_len);
bd5e81a0
DB
3027 }
3028 } else {
253e4dc0
DM
3029 if (nat) {
3030 seq_skew = repl_ftp_v4_addr(pkt, v4_addr_rep,
3031 ftp_data_start,
cd7c99a6
DB
3032 addr_offset_from_ftp_data_start,
3033 addr_size);
253e4dc0 3034 }
bd5e81a0 3035 if (seq_skew) {
253e4dc0 3036 ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
bd5e81a0
DB
3037 l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
3038 l3_hdr->ip_tot_len, htons(ip_len));
3039 l3_hdr->ip_tot_len = htons(ip_len);
bd5e81a0
DB
3040 }
3041 }
3042 } else {
3043 OVS_NOT_REACHED();
3044 }
bd5e81a0
DB
3045 }
3046
3047 struct tcp_header *th = dp_packet_l4(pkt);
dec0dbbc 3048
253e4dc0 3049 if (nat && ec->seq_skew != 0) {
d13d7115
DB
3050 ctx->reply != ec->seq_skew_dir ?
3051 adj_seqnum(&th->tcp_ack, -ec->seq_skew) :
3052 adj_seqnum(&th->tcp_seq, ec->seq_skew);
bd5e81a0
DB
3053 }
3054
bd5e81a0 3055 th->tcp_csum = 0;
bd5e81a0 3056 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
76d85771
DB
3057 th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
3058 dp_packet_l4_size(pkt));
bd5e81a0 3059 } else {
76d85771
DB
3060 uint32_t tcp_csum = packet_csum_pseudoheader(l3_hdr);
3061 th->tcp_csum = csum_finish(
3062 csum_continue(tcp_csum, th, dp_packet_l4_size(pkt)));
bd5e81a0 3063 }
253e4dc0
DM
3064
3065 if (seq_skew) {
967bb5c5 3066 conn_seq_skew_set(ct, ec, now, seq_skew + ec->seq_skew,
253e4dc0
DM
3067 ctx->reply);
3068 }
bd5e81a0 3069}
7be77cb0
DB
3070
3071static void
3072handle_tftp_ctl(struct conntrack *ct,
94e71143 3073 const struct conn_lookup_ctx *ctx OVS_UNUSED,
967bb5c5
DB
3074 struct dp_packet *pkt, struct conn *conn_for_expectation,
3075 long long now OVS_UNUSED, enum ftp_ctl_pkt ftp_ctl OVS_UNUSED,
3076 bool nat OVS_UNUSED)
7be77cb0 3077{
be38342d
DB
3078 expectation_create(ct, conn_for_expectation->key.src.port,
3079 conn_for_expectation,
3080 !!(pkt->md.ct_state & CS_REPLY_DIR), false, false);
7be77cb0 3081}