]>
Commit | Line | Data |
---|---|---|
ac0630a2 RB |
1 | /* |
2 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
3 | * you may not use this file except in compliance with the License. | |
4 | * You may obtain a copy of the License at: | |
5 | * | |
6 | * http://www.apache.org/licenses/LICENSE-2.0 | |
7 | * | |
8 | * Unless required by applicable law or agreed to in writing, software | |
9 | * distributed under the License is distributed on an "AS IS" BASIS, | |
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
11 | * See the License for the specific language governing permissions and | |
12 | * limitations under the License. | |
13 | */ | |
14 | ||
15 | #include <config.h> | |
16 | ||
17 | #include <getopt.h> | |
18 | #include <stdlib.h> | |
19 | #include <stdio.h> | |
20 | ||
b511690b | 21 | #include "bitmap.h" |
ac0630a2 | 22 | #include "command-line.h" |
67d9b930 | 23 | #include "daemon.h" |
ac0630a2 | 24 | #include "dirs.h" |
3e8a2ad1 | 25 | #include "openvswitch/dynamic-string.h" |
ac0630a2 | 26 | #include "fatal-signal.h" |
4edcdcf4 | 27 | #include "hash.h" |
ee89ea7b TW |
28 | #include "openvswitch/hmap.h" |
29 | #include "openvswitch/json.h" | |
8b2ed684 | 30 | #include "ovn/lex.h" |
b86f4767 | 31 | #include "ovn/lib/chassis-index.h" |
06a26dd2 | 32 | #include "ovn/lib/logical-fields.h" |
16936e4d | 33 | #include "ovn/lib/ovn-l7.h" |
e3df8838 BP |
34 | #include "ovn/lib/ovn-nb-idl.h" |
35 | #include "ovn/lib/ovn-sb-idl.h" | |
218351dd | 36 | #include "ovn/lib/ovn-util.h" |
a6095f81 | 37 | #include "ovn/actions.h" |
064d7f84 | 38 | #include "packets.h" |
fd016ae3 | 39 | #include "openvswitch/poll-loop.h" |
5868eb24 | 40 | #include "smap.h" |
7a15be69 | 41 | #include "sset.h" |
ac0630a2 RB |
42 | #include "stream.h" |
43 | #include "stream-ssl.h" | |
7b303ff9 | 44 | #include "unixctl.h" |
ac0630a2 | 45 | #include "util.h" |
4edcdcf4 | 46 | #include "uuid.h" |
ac0630a2 RB |
47 | #include "openvswitch/vlog.h" |
48 | ||
2e2762d4 | 49 | VLOG_DEFINE_THIS_MODULE(ovn_northd); |
ac0630a2 | 50 | |
7b303ff9 AW |
51 | static unixctl_cb_func ovn_northd_exit; |
52 | ||
2e2762d4 | 53 | struct northd_context { |
f93818dd | 54 | struct ovsdb_idl *ovnnb_idl; |
ec78987f | 55 | struct ovsdb_idl *ovnsb_idl; |
f93818dd | 56 | struct ovsdb_idl_txn *ovnnb_txn; |
3c78b3ca | 57 | struct ovsdb_idl_txn *ovnsb_txn; |
f93818dd RB |
58 | }; |
59 | ||
ac0630a2 | 60 | static const char *ovnnb_db; |
ec78987f | 61 | static const char *ovnsb_db; |
ac0630a2 | 62 | |
8639f9be ND |
63 | #define MAC_ADDR_PREFIX 0x0A0000000000ULL |
64 | #define MAC_ADDR_SPACE 0xffffff | |
65 | ||
66 | /* MAC address management (macam) table of "struct eth_addr"s, that holds the | |
67 | * MAC addresses allocated by the OVN ipam module. */ | |
68 | static struct hmap macam = HMAP_INITIALIZER(&macam); | |
b511690b GS |
69 | |
70 | #define MAX_OVN_TAGS 4096 | |
880fcd14 BP |
71 | \f |
72 | /* Pipeline stages. */ | |
ac0630a2 | 73 | |
880fcd14 BP |
74 | /* The two pipelines in an OVN logical flow table. */ |
75 | enum ovn_pipeline { | |
76 | P_IN, /* Ingress pipeline. */ | |
77 | P_OUT /* Egress pipeline. */ | |
78 | }; | |
091e3af9 | 79 | |
880fcd14 BP |
80 | /* The two purposes for which ovn-northd uses OVN logical datapaths. */ |
81 | enum ovn_datapath_type { | |
82 | DP_SWITCH, /* OVN logical switch. */ | |
83 | DP_ROUTER /* OVN logical router. */ | |
091e3af9 JP |
84 | }; |
85 | ||
880fcd14 BP |
86 | /* Returns an "enum ovn_stage" built from the arguments. |
87 | * | |
88 | * (It's better to use ovn_stage_build() for type-safety reasons, but inline | |
89 | * functions can't be used in enums or switch cases.) */ | |
90 | #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \ | |
91 | (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE)) | |
92 | ||
93 | /* A stage within an OVN logical switch or router. | |
091e3af9 | 94 | * |
880fcd14 BP |
95 | * An "enum ovn_stage" indicates whether the stage is part of a logical switch |
96 | * or router, whether the stage is part of the ingress or egress pipeline, and | |
97 | * the table within that pipeline. The first three components are combined to | |
685f4dfe | 98 | * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2, |
880fcd14 BP |
99 | * S_ROUTER_OUT_DELIVERY. */ |
100 | enum ovn_stage { | |
1a03fc7d BS |
101 | #define PIPELINE_STAGES \ |
102 | /* Logical switch ingress stages. */ \ | |
103 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ | |
104 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ | |
105 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ | |
106 | PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ | |
107 | PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ | |
108 | PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ | |
109 | PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ | |
110 | PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \ | |
111 | PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \ | |
112 | PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \ | |
113 | PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \ | |
114 | PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \ | |
115 | PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \ | |
302eda27 NS |
116 | PIPELINE_STAGE(SWITCH, IN, DNS_LOOKUP, 13, "ls_in_dns_lookup") \ |
117 | PIPELINE_STAGE(SWITCH, IN, DNS_RESPONSE, 14, "ls_in_dns_response") \ | |
118 | PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 15, "ls_in_l2_lkup") \ | |
e0c9e58b JP |
119 | \ |
120 | /* Logical switch egress stages. */ \ | |
7a15be69 GS |
121 | PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ |
122 | PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ | |
123 | PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ | |
124 | PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ | |
125 | PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ | |
1a03fc7d BS |
126 | PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \ |
127 | PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \ | |
128 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \ | |
129 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \ | |
e0c9e58b JP |
130 | \ |
131 | /* Logical router ingress stages. */ \ | |
4364646c ZKL |
132 | PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ |
133 | PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \ | |
134 | PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \ | |
135 | PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \ | |
136 | PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \ | |
137 | PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 5, "lr_in_nd_ra_options") \ | |
138 | PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 6, "lr_in_nd_ra_response") \ | |
139 | PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 7, "lr_in_ip_routing") \ | |
140 | PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 8, "lr_in_arp_resolve") \ | |
141 | PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 9, "lr_in_gw_redirect") \ | |
142 | PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 10, "lr_in_arp_request") \ | |
e0c9e58b JP |
143 | \ |
144 | /* Logical router egress stages. */ \ | |
06a26dd2 MS |
145 | PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ |
146 | PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \ | |
147 | PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \ | |
148 | PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery") | |
880fcd14 BP |
149 | |
150 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
151 | S_##DP_TYPE##_##PIPELINE##_##STAGE \ | |
152 | = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE), | |
153 | PIPELINE_STAGES | |
154 | #undef PIPELINE_STAGE | |
091e3af9 JP |
155 | }; |
156 | ||
6bb4a18e JP |
157 | /* Due to various hard-coded priorities need to implement ACLs, the |
158 | * northbound database supports a smaller range of ACL priorities than | |
159 | * are available to logical flows. This value is added to an ACL | |
160 | * priority to determine the ACL's logical flow priority. */ | |
161 | #define OVN_ACL_PRI_OFFSET 1000 | |
162 | ||
06a26dd2 | 163 | /* Register definitions specific to switches. */ |
4364646c ZKL |
164 | #define REGBIT_CONNTRACK_DEFRAG "reg0[0]" |
165 | #define REGBIT_CONNTRACK_COMMIT "reg0[1]" | |
166 | #define REGBIT_CONNTRACK_NAT "reg0[2]" | |
167 | #define REGBIT_DHCP_OPTS_RESULT "reg0[3]" | |
302eda27 | 168 | #define REGBIT_DNS_LOOKUP_RESULT "reg0[4]" |
4364646c | 169 | #define REGBIT_ND_RA_OPTS_RESULT "reg0[5]" |
facf8652 | 170 | |
06a26dd2 MS |
171 | /* Register definitions for switches and routers. */ |
172 | #define REGBIT_NAT_REDIRECT "reg9[0]" | |
173 | /* Indicate that this packet has been recirculated using egress | |
174 | * loopback. This allows certain checks to be bypassed, such as a | |
175 | * logical router dropping packets with source IP address equals | |
176 | * one of the logical router's own IP addresses. */ | |
177 | #define REGBIT_EGRESS_LOOPBACK "reg9[1]" | |
178 | ||
880fcd14 BP |
179 | /* Returns an "enum ovn_stage" built from the arguments. */ |
180 | static enum ovn_stage | |
181 | ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline, | |
182 | uint8_t table) | |
183 | { | |
184 | return OVN_STAGE_BUILD(dp_type, pipeline, table); | |
185 | } | |
186 | ||
187 | /* Returns the pipeline to which 'stage' belongs. */ | |
188 | static enum ovn_pipeline | |
189 | ovn_stage_get_pipeline(enum ovn_stage stage) | |
190 | { | |
191 | return (stage >> 8) & 1; | |
192 | } | |
193 | ||
194 | /* Returns the table to which 'stage' belongs. */ | |
195 | static uint8_t | |
196 | ovn_stage_get_table(enum ovn_stage stage) | |
197 | { | |
198 | return stage & 0xff; | |
199 | } | |
200 | ||
201 | /* Returns a string name for 'stage'. */ | |
202 | static const char * | |
203 | ovn_stage_to_str(enum ovn_stage stage) | |
204 | { | |
205 | switch (stage) { | |
206 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
207 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME; | |
208 | PIPELINE_STAGES | |
209 | #undef PIPELINE_STAGE | |
210 | default: return "<unknown>"; | |
211 | } | |
212 | } | |
9a9961d2 BP |
213 | |
214 | /* Returns the type of the datapath to which a flow with the given 'stage' may | |
215 | * be added. */ | |
216 | static enum ovn_datapath_type | |
217 | ovn_stage_to_datapath_type(enum ovn_stage stage) | |
218 | { | |
219 | switch (stage) { | |
220 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
221 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE; | |
222 | PIPELINE_STAGES | |
223 | #undef PIPELINE_STAGE | |
224 | default: OVS_NOT_REACHED(); | |
225 | } | |
226 | } | |
880fcd14 | 227 | \f |
ac0630a2 RB |
228 | static void |
229 | usage(void) | |
230 | { | |
231 | printf("\ | |
232 | %s: OVN northbound management daemon\n\ | |
233 | usage: %s [OPTIONS]\n\ | |
234 | \n\ | |
235 | Options:\n\ | |
236 | --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\ | |
237 | (default: %s)\n\ | |
ec78987f | 238 | --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\ |
ac0630a2 RB |
239 | (default: %s)\n\ |
240 | -h, --help display this help message\n\ | |
241 | -o, --options list available options\n\ | |
242 | -V, --version display version information\n\ | |
60bdd011 | 243 | ", program_name, program_name, default_nb_db(), default_sb_db()); |
67d9b930 | 244 | daemon_usage(); |
ac0630a2 RB |
245 | vlog_usage(); |
246 | stream_usage("database", true, true, false); | |
247 | } | |
248 | \f | |
5868eb24 BP |
249 | struct tnlid_node { |
250 | struct hmap_node hmap_node; | |
251 | uint32_t tnlid; | |
252 | }; | |
253 | ||
254 | static void | |
255 | destroy_tnlids(struct hmap *tnlids) | |
4edcdcf4 | 256 | { |
4ec3d7c7 DDP |
257 | struct tnlid_node *node; |
258 | HMAP_FOR_EACH_POP (node, hmap_node, tnlids) { | |
5868eb24 BP |
259 | free(node); |
260 | } | |
261 | hmap_destroy(tnlids); | |
262 | } | |
263 | ||
264 | static void | |
265 | add_tnlid(struct hmap *set, uint32_t tnlid) | |
266 | { | |
267 | struct tnlid_node *node = xmalloc(sizeof *node); | |
268 | hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0)); | |
269 | node->tnlid = tnlid; | |
4edcdcf4 RB |
270 | } |
271 | ||
4edcdcf4 | 272 | static bool |
5868eb24 | 273 | tnlid_in_use(const struct hmap *set, uint32_t tnlid) |
4edcdcf4 | 274 | { |
5868eb24 BP |
275 | const struct tnlid_node *node; |
276 | HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) { | |
277 | if (node->tnlid == tnlid) { | |
278 | return true; | |
279 | } | |
280 | } | |
281 | return false; | |
282 | } | |
4edcdcf4 | 283 | |
5868eb24 BP |
284 | static uint32_t |
285 | allocate_tnlid(struct hmap *set, const char *name, uint32_t max, | |
286 | uint32_t *hint) | |
287 | { | |
288 | for (uint32_t tnlid = *hint + 1; tnlid != *hint; | |
289 | tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) { | |
290 | if (!tnlid_in_use(set, tnlid)) { | |
291 | add_tnlid(set, tnlid); | |
292 | *hint = tnlid; | |
293 | return tnlid; | |
294 | } | |
4edcdcf4 RB |
295 | } |
296 | ||
5868eb24 BP |
297 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
298 | VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); | |
299 | return 0; | |
300 | } | |
301 | \f | |
a6095f81 BS |
302 | struct ovn_chassis_qdisc_queues { |
303 | struct hmap_node key_node; | |
304 | uint32_t queue_id; | |
305 | struct uuid chassis_uuid; | |
306 | }; | |
307 | ||
308 | static void | |
309 | destroy_chassis_queues(struct hmap *set) | |
310 | { | |
311 | struct ovn_chassis_qdisc_queues *node; | |
312 | HMAP_FOR_EACH_POP (node, key_node, set) { | |
313 | free(node); | |
314 | } | |
315 | hmap_destroy(set); | |
316 | } | |
317 | ||
318 | static void | |
319 | add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid, | |
320 | uint32_t queue_id) | |
321 | { | |
322 | struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node); | |
323 | node->queue_id = queue_id; | |
324 | memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid); | |
325 | hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid)); | |
326 | } | |
327 | ||
328 | static bool | |
329 | chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid, | |
330 | uint32_t queue_id) | |
331 | { | |
332 | const struct ovn_chassis_qdisc_queues *node; | |
333 | HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) { | |
334 | if (uuid_equals(chassis_uuid, &node->chassis_uuid) | |
335 | && node->queue_id == queue_id) { | |
336 | return true; | |
337 | } | |
338 | } | |
339 | return false; | |
340 | } | |
341 | ||
342 | static uint32_t | |
343 | allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis) | |
344 | { | |
345 | for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1; | |
346 | queue_id <= QDISC_MAX_QUEUE_ID; | |
347 | queue_id++) { | |
348 | if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) { | |
349 | add_chassis_queue(set, &chassis->header_.uuid, queue_id); | |
350 | return queue_id; | |
351 | } | |
352 | } | |
353 | ||
354 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
355 | VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name); | |
356 | return 0; | |
357 | } | |
358 | ||
359 | static void | |
360 | free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis, | |
361 | uint32_t queue_id) | |
362 | { | |
363 | struct ovn_chassis_qdisc_queues *node; | |
364 | HMAP_FOR_EACH_WITH_HASH (node, key_node, | |
365 | uuid_hash(&chassis->header_.uuid), | |
366 | set) { | |
367 | if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid) | |
368 | && node->queue_id == queue_id) { | |
369 | hmap_remove(set, &node->key_node); | |
370 | break; | |
371 | } | |
372 | } | |
373 | } | |
374 | ||
375 | static inline bool | |
376 | port_has_qos_params(const struct smap *opts) | |
377 | { | |
378 | return (smap_get(opts, "qos_max_rate") || | |
379 | smap_get(opts, "qos_burst")); | |
380 | } | |
381 | \f | |
161ea2c8 NS |
382 | |
383 | struct ipam_info { | |
384 | uint32_t start_ipv4; | |
385 | size_t total_ipv4s; | |
386 | unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */ | |
7cc0741e NS |
387 | bool ipv6_prefix_set; |
388 | struct in6_addr ipv6_prefix; | |
161ea2c8 NS |
389 | }; |
390 | ||
9975d7be BP |
391 | /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or |
392 | * sb->external_ids:logical-switch. */ | |
5868eb24 BP |
393 | struct ovn_datapath { |
394 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be | 395 | struct uuid key; /* (nbs/nbr)->header_.uuid. */ |
4edcdcf4 | 396 | |
9975d7be BP |
397 | const struct nbrec_logical_switch *nbs; /* May be NULL. */ |
398 | const struct nbrec_logical_router *nbr; /* May be NULL. */ | |
5868eb24 | 399 | const struct sbrec_datapath_binding *sb; /* May be NULL. */ |
4edcdcf4 | 400 | |
5868eb24 | 401 | struct ovs_list list; /* In list of similar records. */ |
4edcdcf4 | 402 | |
9975d7be | 403 | /* Logical switch data. */ |
86e98048 BP |
404 | struct ovn_port **router_ports; |
405 | size_t n_router_ports; | |
9975d7be | 406 | |
5868eb24 BP |
407 | struct hmap port_tnlids; |
408 | uint32_t port_key_hint; | |
409 | ||
410 | bool has_unknown; | |
8639f9be ND |
411 | |
412 | /* IPAM data. */ | |
161ea2c8 | 413 | struct ipam_info *ipam_info; |
41a15b71 MS |
414 | |
415 | /* OVN northd only needs to know about the logical router gateway port for | |
416 | * NAT on a distributed router. This "distributed gateway port" is | |
417 | * populated only when there is a "redirect-chassis" specified for one of | |
418 | * the ports on the logical router. Otherwise this will be NULL. */ | |
419 | struct ovn_port *l3dgw_port; | |
420 | /* The "derived" OVN port representing the instance of l3dgw_port on | |
421 | * the "redirect-chassis". */ | |
422 | struct ovn_port *l3redirect_port; | |
5b29422c | 423 | struct ovn_port *localnet_port; |
8639f9be ND |
424 | }; |
425 | ||
426 | struct macam_node { | |
427 | struct hmap_node hmap_node; | |
428 | struct eth_addr mac_addr; /* Allocated MAC address. */ | |
5868eb24 BP |
429 | }; |
430 | ||
8639f9be ND |
431 | static void |
432 | cleanup_macam(struct hmap *macam) | |
433 | { | |
434 | struct macam_node *node; | |
435 | HMAP_FOR_EACH_POP (node, hmap_node, macam) { | |
436 | free(node); | |
437 | } | |
438 | } | |
439 | ||
5868eb24 BP |
440 | static struct ovn_datapath * |
441 | ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, | |
9975d7be BP |
442 | const struct nbrec_logical_switch *nbs, |
443 | const struct nbrec_logical_router *nbr, | |
5868eb24 BP |
444 | const struct sbrec_datapath_binding *sb) |
445 | { | |
446 | struct ovn_datapath *od = xzalloc(sizeof *od); | |
447 | od->key = *key; | |
448 | od->sb = sb; | |
9975d7be BP |
449 | od->nbs = nbs; |
450 | od->nbr = nbr; | |
5868eb24 BP |
451 | hmap_init(&od->port_tnlids); |
452 | od->port_key_hint = 0; | |
453 | hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); | |
454 | return od; | |
455 | } | |
456 | ||
457 | static void | |
458 | ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) | |
459 | { | |
460 | if (od) { | |
461 | /* Don't remove od->list. It is used within build_datapaths() as a | |
462 | * private list and once we've exited that function it is not safe to | |
463 | * use it. */ | |
464 | hmap_remove(datapaths, &od->key_node); | |
465 | destroy_tnlids(&od->port_tnlids); | |
161ea2c8 NS |
466 | if (od->ipam_info) { |
467 | bitmap_free(od->ipam_info->allocated_ipv4s); | |
468 | free(od->ipam_info); | |
469 | } | |
86e98048 | 470 | free(od->router_ports); |
5868eb24 BP |
471 | free(od); |
472 | } | |
473 | } | |
474 | ||
9a9961d2 BP |
475 | /* Returns 'od''s datapath type. */ |
476 | static enum ovn_datapath_type | |
477 | ovn_datapath_get_type(const struct ovn_datapath *od) | |
478 | { | |
479 | return od->nbs ? DP_SWITCH : DP_ROUTER; | |
480 | } | |
481 | ||
5868eb24 BP |
482 | static struct ovn_datapath * |
483 | ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) | |
484 | { | |
485 | struct ovn_datapath *od; | |
486 | ||
487 | HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) { | |
488 | if (uuid_equals(uuid, &od->key)) { | |
489 | return od; | |
490 | } | |
491 | } | |
492 | return NULL; | |
493 | } | |
494 | ||
495 | static struct ovn_datapath * | |
496 | ovn_datapath_from_sbrec(struct hmap *datapaths, | |
497 | const struct sbrec_datapath_binding *sb) | |
498 | { | |
499 | struct uuid key; | |
500 | ||
9975d7be BP |
501 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
502 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
5868eb24 BP |
503 | return NULL; |
504 | } | |
505 | return ovn_datapath_find(datapaths, &key); | |
506 | } | |
507 | ||
5412db30 J |
508 | static bool |
509 | lrouter_is_enabled(const struct nbrec_logical_router *lrouter) | |
510 | { | |
511 | return !lrouter->enabled || *lrouter->enabled; | |
512 | } | |
513 | ||
161ea2c8 NS |
514 | static void |
515 | init_ipam_info_for_datapath(struct ovn_datapath *od) | |
516 | { | |
517 | if (!od->nbs) { | |
518 | return; | |
519 | } | |
520 | ||
521 | const char *subnet_str = smap_get(&od->nbs->other_config, "subnet"); | |
7cc0741e NS |
522 | const char *ipv6_prefix = smap_get(&od->nbs->other_config, "ipv6_prefix"); |
523 | ||
524 | if (ipv6_prefix) { | |
525 | od->ipam_info = xzalloc(sizeof *od->ipam_info); | |
526 | od->ipam_info->ipv6_prefix_set = ipv6_parse( | |
527 | ipv6_prefix, &od->ipam_info->ipv6_prefix); | |
528 | } | |
529 | ||
161ea2c8 NS |
530 | if (!subnet_str) { |
531 | return; | |
532 | } | |
533 | ||
534 | ovs_be32 subnet, mask; | |
535 | char *error = ip_parse_masked(subnet_str, &subnet, &mask); | |
536 | if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) { | |
537 | static struct vlog_rate_limit rl | |
538 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
539 | VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str); | |
540 | free(error); | |
541 | return; | |
542 | } | |
543 | ||
7cc0741e NS |
544 | if (!od->ipam_info) { |
545 | od->ipam_info = xzalloc(sizeof *od->ipam_info); | |
546 | } | |
161ea2c8 NS |
547 | od->ipam_info->start_ipv4 = ntohl(subnet) + 1; |
548 | od->ipam_info->total_ipv4s = ~ntohl(mask); | |
549 | od->ipam_info->allocated_ipv4s = | |
550 | bitmap_allocate(od->ipam_info->total_ipv4s); | |
551 | ||
552 | /* Mark first IP as taken */ | |
553 | bitmap_set1(od->ipam_info->allocated_ipv4s, 0); | |
554 | ||
555 | /* Check if there are any reserver IPs (list) to be excluded from IPAM */ | |
556 | const char *exclude_ip_list = smap_get(&od->nbs->other_config, | |
557 | "exclude_ips"); | |
558 | if (!exclude_ip_list) { | |
559 | return; | |
560 | } | |
561 | ||
562 | struct lexer lexer; | |
563 | lexer_init(&lexer, exclude_ip_list); | |
564 | /* exclude_ip_list could be in the format - | |
565 | * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110". | |
566 | */ | |
567 | lexer_get(&lexer); | |
568 | while (lexer.token.type != LEX_T_END) { | |
569 | if (lexer.token.type != LEX_T_INTEGER) { | |
570 | lexer_syntax_error(&lexer, "expecting address"); | |
571 | break; | |
572 | } | |
573 | uint32_t start = ntohl(lexer.token.value.ipv4); | |
574 | lexer_get(&lexer); | |
575 | ||
576 | uint32_t end = start + 1; | |
577 | if (lexer_match(&lexer, LEX_T_ELLIPSIS)) { | |
578 | if (lexer.token.type != LEX_T_INTEGER) { | |
579 | lexer_syntax_error(&lexer, "expecting address range"); | |
580 | break; | |
581 | } | |
582 | end = ntohl(lexer.token.value.ipv4) + 1; | |
583 | lexer_get(&lexer); | |
584 | } | |
585 | ||
586 | /* Clamp start...end to fit the subnet. */ | |
587 | start = MAX(od->ipam_info->start_ipv4, start); | |
588 | end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end); | |
589 | if (end > start) { | |
590 | bitmap_set_multiple(od->ipam_info->allocated_ipv4s, | |
591 | start - od->ipam_info->start_ipv4, | |
592 | end - start, 1); | |
593 | } else { | |
594 | lexer_error(&lexer, "excluded addresses not in subnet"); | |
595 | } | |
596 | } | |
597 | if (lexer.error) { | |
598 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
599 | VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)", | |
600 | UUID_ARGS(&od->key), lexer.error); | |
601 | } | |
602 | lexer_destroy(&lexer); | |
603 | } | |
604 | ||
c5fec4f6 BP |
605 | static void |
606 | ovn_datapath_update_external_ids(struct ovn_datapath *od) | |
607 | { | |
608 | /* Get the logical-switch or logical-router UUID to set in | |
609 | * external-ids. */ | |
610 | char uuid_s[UUID_LEN + 1]; | |
611 | sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key)); | |
612 | const char *key = od->nbs ? "logical-switch" : "logical-router"; | |
613 | ||
614 | /* Get names to set in external-ids. */ | |
615 | const char *name = od->nbs ? od->nbs->name : od->nbr->name; | |
616 | const char *name2 = (od->nbs | |
617 | ? smap_get(&od->nbs->external_ids, | |
618 | "neutron:network_name") | |
619 | : smap_get(&od->nbr->external_ids, | |
620 | "neutron:router_name")); | |
621 | ||
622 | /* Set external-ids. */ | |
623 | struct smap ids = SMAP_INITIALIZER(&ids); | |
624 | smap_add(&ids, key, uuid_s); | |
625 | smap_add(&ids, "name", name); | |
626 | if (name2 && name2[0]) { | |
627 | smap_add(&ids, "name2", name2); | |
628 | } | |
629 | sbrec_datapath_binding_set_external_ids(od->sb, &ids); | |
630 | smap_destroy(&ids); | |
631 | } | |
632 | ||
5868eb24 BP |
633 | static void |
634 | join_datapaths(struct northd_context *ctx, struct hmap *datapaths, | |
635 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
636 | struct ovs_list *both) | |
637 | { | |
638 | hmap_init(datapaths); | |
417e7e66 BW |
639 | ovs_list_init(sb_only); |
640 | ovs_list_init(nb_only); | |
641 | ovs_list_init(both); | |
5868eb24 BP |
642 | |
643 | const struct sbrec_datapath_binding *sb, *sb_next; | |
644 | SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { | |
645 | struct uuid key; | |
9975d7be BP |
646 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
647 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
648 | ovsdb_idl_txn_add_comment( | |
649 | ctx->ovnsb_txn, | |
650 | "deleting Datapath_Binding "UUID_FMT" that lacks " | |
651 | "external-ids:logical-switch and " | |
652 | "external-ids:logical-router", | |
653 | UUID_ARGS(&sb->header_.uuid)); | |
5868eb24 BP |
654 | sbrec_datapath_binding_delete(sb); |
655 | continue; | |
656 | } | |
657 | ||
658 | if (ovn_datapath_find(datapaths, &key)) { | |
659 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
9975d7be BP |
660 | VLOG_INFO_RL( |
661 | &rl, "deleting Datapath_Binding "UUID_FMT" with " | |
662 | "duplicate external-ids:logical-switch/router "UUID_FMT, | |
663 | UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key)); | |
5868eb24 BP |
664 | sbrec_datapath_binding_delete(sb); |
665 | continue; | |
666 | } | |
667 | ||
668 | struct ovn_datapath *od = ovn_datapath_create(datapaths, &key, | |
9975d7be | 669 | NULL, NULL, sb); |
417e7e66 | 670 | ovs_list_push_back(sb_only, &od->list); |
5868eb24 BP |
671 | } |
672 | ||
9975d7be BP |
673 | const struct nbrec_logical_switch *nbs; |
674 | NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) { | |
5868eb24 | 675 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
9975d7be | 676 | &nbs->header_.uuid); |
5868eb24 | 677 | if (od) { |
9975d7be | 678 | od->nbs = nbs; |
417e7e66 BW |
679 | ovs_list_remove(&od->list); |
680 | ovs_list_push_back(both, &od->list); | |
c5fec4f6 | 681 | ovn_datapath_update_external_ids(od); |
5868eb24 | 682 | } else { |
9975d7be BP |
683 | od = ovn_datapath_create(datapaths, &nbs->header_.uuid, |
684 | nbs, NULL, NULL); | |
417e7e66 | 685 | ovs_list_push_back(nb_only, &od->list); |
5868eb24 | 686 | } |
161ea2c8 NS |
687 | |
688 | init_ipam_info_for_datapath(od); | |
5868eb24 | 689 | } |
9975d7be BP |
690 | |
691 | const struct nbrec_logical_router *nbr; | |
692 | NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) { | |
5412db30 J |
693 | if (!lrouter_is_enabled(nbr)) { |
694 | continue; | |
695 | } | |
696 | ||
9975d7be BP |
697 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
698 | &nbr->header_.uuid); | |
699 | if (od) { | |
700 | if (!od->nbs) { | |
701 | od->nbr = nbr; | |
417e7e66 BW |
702 | ovs_list_remove(&od->list); |
703 | ovs_list_push_back(both, &od->list); | |
c5fec4f6 | 704 | ovn_datapath_update_external_ids(od); |
9975d7be BP |
705 | } else { |
706 | /* Can't happen! */ | |
707 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
708 | VLOG_WARN_RL(&rl, | |
709 | "duplicate UUID "UUID_FMT" in OVN_Northbound", | |
710 | UUID_ARGS(&nbr->header_.uuid)); | |
711 | continue; | |
712 | } | |
713 | } else { | |
714 | od = ovn_datapath_create(datapaths, &nbr->header_.uuid, | |
715 | NULL, nbr, NULL); | |
417e7e66 | 716 | ovs_list_push_back(nb_only, &od->list); |
9975d7be | 717 | } |
9975d7be | 718 | } |
5868eb24 BP |
719 | } |
720 | ||
721 | static uint32_t | |
722 | ovn_datapath_allocate_key(struct hmap *dp_tnlids) | |
723 | { | |
724 | static uint32_t hint; | |
725 | return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint); | |
726 | } | |
727 | ||
0bac7164 BP |
728 | /* Updates the southbound Datapath_Binding table so that it contains the |
729 | * logical switches and routers specified by the northbound database. | |
730 | * | |
731 | * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical | |
732 | * switch and router. */ | |
5868eb24 BP |
733 | static void |
734 | build_datapaths(struct northd_context *ctx, struct hmap *datapaths) | |
735 | { | |
736 | struct ovs_list sb_only, nb_only, both; | |
737 | ||
738 | join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both); | |
739 | ||
417e7e66 | 740 | if (!ovs_list_is_empty(&nb_only)) { |
5868eb24 BP |
741 | /* First index the in-use datapath tunnel IDs. */ |
742 | struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); | |
743 | struct ovn_datapath *od; | |
744 | LIST_FOR_EACH (od, list, &both) { | |
745 | add_tnlid(&dp_tnlids, od->sb->tunnel_key); | |
746 | } | |
747 | ||
748 | /* Add southbound record for each unmatched northbound record. */ | |
749 | LIST_FOR_EACH (od, list, &nb_only) { | |
750 | uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids); | |
751 | if (!tunnel_key) { | |
752 | break; | |
753 | } | |
754 | ||
755 | od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn); | |
c5fec4f6 | 756 | ovn_datapath_update_external_ids(od); |
5868eb24 BP |
757 | sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key); |
758 | } | |
759 | destroy_tnlids(&dp_tnlids); | |
760 | } | |
761 | ||
762 | /* Delete southbound records without northbound matches. */ | |
763 | struct ovn_datapath *od, *next; | |
764 | LIST_FOR_EACH_SAFE (od, next, list, &sb_only) { | |
417e7e66 | 765 | ovs_list_remove(&od->list); |
5868eb24 BP |
766 | sbrec_datapath_binding_delete(od->sb); |
767 | ovn_datapath_destroy(datapaths, od); | |
768 | } | |
769 | } | |
770 | \f | |
771 | struct ovn_port { | |
772 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be BP |
773 | char *key; /* nbs->name, nbr->name, sb->logical_port. */ |
774 | char *json_key; /* 'key', quoted for use in JSON. */ | |
5868eb24 | 775 | |
9975d7be BP |
776 | const struct sbrec_port_binding *sb; /* May be NULL. */ |
777 | ||
e93b43d6 | 778 | /* Logical switch port data. */ |
0ee00741 | 779 | const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */ |
e93b43d6 JP |
780 | |
781 | struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */ | |
782 | unsigned int n_lsp_addrs; | |
783 | ||
784 | struct lport_addresses *ps_addrs; /* Port security addresses. */ | |
785 | unsigned int n_ps_addrs; | |
786 | ||
9975d7be | 787 | /* Logical router port data. */ |
0ee00741 | 788 | const struct nbrec_logical_router_port *nbrp; /* May be NULL. */ |
e93b43d6 | 789 | |
4685e523 | 790 | struct lport_addresses lrp_networks; |
c9bdf7bd | 791 | |
41a15b71 MS |
792 | bool derived; /* Indicates whether this is an additional port |
793 | * derived from nbsp or nbrp. */ | |
794 | ||
ad386c3f BP |
795 | /* The port's peer: |
796 | * | |
797 | * - A switch port S of type "router" has a router port R as a peer, | |
798 | * and R in turn has S has its peer. | |
799 | * | |
800 | * - Two connected logical router ports have each other as peer. */ | |
9975d7be | 801 | struct ovn_port *peer; |
5868eb24 BP |
802 | |
803 | struct ovn_datapath *od; | |
804 | ||
805 | struct ovs_list list; /* In list of similar records. */ | |
806 | }; | |
807 | ||
808 | static struct ovn_port * | |
809 | ovn_port_create(struct hmap *ports, const char *key, | |
0ee00741 HK |
810 | const struct nbrec_logical_switch_port *nbsp, |
811 | const struct nbrec_logical_router_port *nbrp, | |
5868eb24 BP |
812 | const struct sbrec_port_binding *sb) |
813 | { | |
814 | struct ovn_port *op = xzalloc(sizeof *op); | |
9975d7be BP |
815 | |
816 | struct ds json_key = DS_EMPTY_INITIALIZER; | |
817 | json_string_escape(key, &json_key); | |
818 | op->json_key = ds_steal_cstr(&json_key); | |
819 | ||
820 | op->key = xstrdup(key); | |
5868eb24 | 821 | op->sb = sb; |
0ee00741 HK |
822 | op->nbsp = nbsp; |
823 | op->nbrp = nbrp; | |
41a15b71 | 824 | op->derived = false; |
5868eb24 BP |
825 | hmap_insert(ports, &op->key_node, hash_string(op->key, 0)); |
826 | return op; | |
827 | } | |
828 | ||
829 | static void | |
830 | ovn_port_destroy(struct hmap *ports, struct ovn_port *port) | |
831 | { | |
832 | if (port) { | |
833 | /* Don't remove port->list. It is used within build_ports() as a | |
834 | * private list and once we've exited that function it is not safe to | |
835 | * use it. */ | |
836 | hmap_remove(ports, &port->key_node); | |
e93b43d6 JP |
837 | |
838 | for (int i = 0; i < port->n_lsp_addrs; i++) { | |
839 | destroy_lport_addresses(&port->lsp_addrs[i]); | |
840 | } | |
841 | free(port->lsp_addrs); | |
842 | ||
843 | for (int i = 0; i < port->n_ps_addrs; i++) { | |
844 | destroy_lport_addresses(&port->ps_addrs[i]); | |
845 | } | |
846 | free(port->ps_addrs); | |
847 | ||
4685e523 | 848 | destroy_lport_addresses(&port->lrp_networks); |
9975d7be BP |
849 | free(port->json_key); |
850 | free(port->key); | |
5868eb24 BP |
851 | free(port); |
852 | } | |
853 | } | |
854 | ||
855 | static struct ovn_port * | |
856 | ovn_port_find(struct hmap *ports, const char *name) | |
857 | { | |
858 | struct ovn_port *op; | |
859 | ||
860 | HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) { | |
861 | if (!strcmp(op->key, name)) { | |
862 | return op; | |
863 | } | |
864 | } | |
865 | return NULL; | |
866 | } | |
867 | ||
868 | static uint32_t | |
869 | ovn_port_allocate_key(struct ovn_datapath *od) | |
870 | { | |
871 | return allocate_tnlid(&od->port_tnlids, "port", | |
872 | (1u << 15) - 1, &od->port_key_hint); | |
873 | } | |
874 | ||
41a15b71 MS |
875 | static char * |
876 | chassis_redirect_name(const char *port_name) | |
877 | { | |
878 | return xasprintf("cr-%s", port_name); | |
879 | } | |
880 | ||
8639f9be ND |
881 | static bool |
882 | ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn) | |
883 | { | |
884 | struct macam_node *macam_node; | |
885 | HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64), | |
886 | &macam) { | |
887 | if (eth_addr_equals(*ea, macam_node->mac_addr)) { | |
888 | if (warn) { | |
889 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
890 | VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT, | |
891 | ETH_ADDR_ARGS(macam_node->mac_addr)); | |
892 | } | |
893 | return true; | |
894 | } | |
895 | } | |
896 | return false; | |
897 | } | |
898 | ||
8639f9be ND |
899 | static void |
900 | ipam_insert_mac(struct eth_addr *ea, bool check) | |
901 | { | |
902 | if (!ea) { | |
903 | return; | |
904 | } | |
905 | ||
906 | uint64_t mac64 = eth_addr_to_uint64(*ea); | |
907 | /* If the new MAC was not assigned by this address management system or | |
908 | * check is true and the new MAC is a duplicate, do not insert it into the | |
909 | * macam hmap. */ | |
910 | if (((mac64 ^ MAC_ADDR_PREFIX) >> 24) | |
911 | || (check && ipam_is_duplicate_mac(ea, mac64, true))) { | |
912 | return; | |
913 | } | |
914 | ||
915 | struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node); | |
916 | new_macam_node->mac_addr = *ea; | |
917 | hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64)); | |
918 | } | |
919 | ||
920 | static void | |
161ea2c8 | 921 | ipam_insert_ip(struct ovn_datapath *od, uint32_t ip) |
8639f9be | 922 | { |
161ea2c8 | 923 | if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
924 | return; |
925 | } | |
926 | ||
161ea2c8 NS |
927 | if (ip >= od->ipam_info->start_ipv4 && |
928 | ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) { | |
929 | bitmap_set1(od->ipam_info->allocated_ipv4s, | |
930 | ip - od->ipam_info->start_ipv4); | |
8639f9be | 931 | } |
8639f9be ND |
932 | } |
933 | ||
934 | static void | |
935 | ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
936 | char *address) | |
937 | { | |
938 | if (!od || !op || !address || !strcmp(address, "unknown") | |
20418099 | 939 | || !strcmp(address, "router") || is_dynamic_lsp_address(address)) { |
8639f9be ND |
940 | return; |
941 | } | |
942 | ||
943 | struct lport_addresses laddrs; | |
944 | if (!extract_lsp_addresses(address, &laddrs)) { | |
945 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
946 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
947 | return; | |
948 | } | |
949 | ipam_insert_mac(&laddrs.ea, true); | |
950 | ||
951 | /* IP is only added to IPAM if the switch's subnet option | |
952 | * is set, whereas MAC is always added to MACAM. */ | |
161ea2c8 | 953 | if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
954 | destroy_lport_addresses(&laddrs); |
955 | return; | |
956 | } | |
957 | ||
958 | for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) { | |
959 | uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr); | |
161ea2c8 | 960 | ipam_insert_ip(od, ip); |
8639f9be ND |
961 | } |
962 | ||
963 | destroy_lport_addresses(&laddrs); | |
964 | } | |
965 | ||
966 | static void | |
967 | ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op) | |
968 | { | |
969 | if (!od || !op) { | |
970 | return; | |
971 | } | |
972 | ||
973 | if (op->nbsp) { | |
974 | /* Add all the port's addresses to address data structures. */ | |
975 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { | |
976 | ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]); | |
977 | } | |
978 | if (op->nbsp->dynamic_addresses) { | |
979 | ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses); | |
980 | } | |
981 | } else if (op->nbrp) { | |
982 | struct lport_addresses lrp_networks; | |
983 | if (!extract_lrp_networks(op->nbrp, &lrp_networks)) { | |
984 | static struct vlog_rate_limit rl | |
985 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
986 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
987 | return; | |
988 | } | |
989 | ipam_insert_mac(&lrp_networks.ea, true); | |
990 | ||
991 | if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs | |
992 | || !smap_get(&op->peer->od->nbs->other_config, "subnet")) { | |
993 | destroy_lport_addresses(&lrp_networks); | |
994 | return; | |
995 | } | |
996 | ||
997 | for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) { | |
998 | uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr); | |
161ea2c8 | 999 | ipam_insert_ip(op->peer->od, ip); |
8639f9be ND |
1000 | } |
1001 | ||
1002 | destroy_lport_addresses(&lrp_networks); | |
1003 | } | |
1004 | } | |
1005 | ||
1006 | static uint64_t | |
1007 | ipam_get_unused_mac(void) | |
1008 | { | |
1009 | /* Stores the suffix of the most recently ipam-allocated MAC address. */ | |
1010 | static uint32_t last_mac; | |
1011 | ||
1012 | uint64_t mac64; | |
1013 | struct eth_addr mac; | |
1014 | uint32_t mac_addr_suffix, i; | |
1015 | for (i = 0; i < MAC_ADDR_SPACE - 1; i++) { | |
1016 | /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */ | |
1017 | mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1; | |
1018 | mac64 = MAC_ADDR_PREFIX | mac_addr_suffix; | |
1019 | eth_addr_from_uint64(mac64, &mac); | |
1020 | if (!ipam_is_duplicate_mac(&mac, mac64, false)) { | |
1021 | last_mac = mac_addr_suffix; | |
1022 | break; | |
1023 | } | |
1024 | } | |
1025 | ||
1026 | if (i == MAC_ADDR_SPACE) { | |
1027 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1028 | VLOG_WARN_RL(&rl, "MAC address space exhausted."); | |
1029 | mac64 = 0; | |
1030 | } | |
1031 | ||
1032 | return mac64; | |
1033 | } | |
1034 | ||
1035 | static uint32_t | |
161ea2c8 | 1036 | ipam_get_unused_ip(struct ovn_datapath *od) |
8639f9be | 1037 | { |
161ea2c8 | 1038 | if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
1039 | return 0; |
1040 | } | |
1041 | ||
161ea2c8 NS |
1042 | size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0, |
1043 | od->ipam_info->total_ipv4s - 1); | |
1044 | if (new_ip_index == od->ipam_info->total_ipv4s - 1) { | |
8639f9be ND |
1045 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); |
1046 | VLOG_WARN_RL( &rl, "Subnet address space has been exhausted."); | |
161ea2c8 | 1047 | return 0; |
8639f9be ND |
1048 | } |
1049 | ||
161ea2c8 | 1050 | return od->ipam_info->start_ipv4 + new_ip_index; |
8639f9be ND |
1051 | } |
1052 | ||
1053 | static bool | |
1054 | ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
161ea2c8 | 1055 | const char *addrspec) |
8639f9be | 1056 | { |
7cc0741e | 1057 | if (!op->nbsp || !od->ipam_info) { |
8639f9be ND |
1058 | return false; |
1059 | } | |
1060 | ||
7cc0741e | 1061 | /* Get or generate MAC address. */ |
8639f9be | 1062 | struct eth_addr mac; |
7cc0741e | 1063 | bool dynamic_mac; |
6374d518 | 1064 | int n = 0; |
6374d518 LR |
1065 | if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n", |
1066 | ETH_ADDR_SCAN_ARGS(mac), &n) | |
1067 | && addrspec[n] == '\0') { | |
7cc0741e | 1068 | dynamic_mac = false; |
6374d518 LR |
1069 | } else { |
1070 | uint64_t mac64 = ipam_get_unused_mac(); | |
1071 | if (!mac64) { | |
1072 | return false; | |
1073 | } | |
1074 | eth_addr_from_uint64(mac64, &mac); | |
7cc0741e | 1075 | dynamic_mac = true; |
8639f9be | 1076 | } |
8639f9be | 1077 | |
7cc0741e NS |
1078 | /* Generate IPv4 address, if desirable. */ |
1079 | bool dynamic_ip4 = od->ipam_info->allocated_ipv4s != NULL; | |
1080 | uint32_t ip4 = dynamic_ip4 ? ipam_get_unused_ip(od) : 0; | |
8639f9be | 1081 | |
7cc0741e NS |
1082 | /* Generate IPv6 address, if desirable. */ |
1083 | bool dynamic_ip6 = od->ipam_info->ipv6_prefix_set; | |
1084 | struct in6_addr ip6; | |
1085 | if (dynamic_ip6) { | |
1086 | in6_generate_eui64(mac, &od->ipam_info->ipv6_prefix, &ip6); | |
1087 | } | |
8639f9be | 1088 | |
7cc0741e NS |
1089 | /* If we didn't generate anything, bail out. */ |
1090 | if (!dynamic_ip4 && !dynamic_ip6) { | |
1091 | return false; | |
1092 | } | |
1093 | ||
1094 | /* Save the dynamic addresses. */ | |
1095 | struct ds new_addr = DS_EMPTY_INITIALIZER; | |
1096 | ds_put_format(&new_addr, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); | |
1097 | if (dynamic_ip4 && ip4) { | |
1098 | ipam_insert_ip(od, ip4); | |
1099 | ds_put_format(&new_addr, " "IP_FMT, IP_ARGS(htonl(ip4))); | |
1100 | } | |
1101 | if (dynamic_ip6) { | |
1102 | char ip6_s[INET6_ADDRSTRLEN + 1]; | |
1103 | ipv6_string_mapped(ip6_s, &ip6); | |
1104 | ds_put_format(&new_addr, " %s", ip6_s); | |
1105 | } | |
1106 | ipam_insert_mac(&mac, !dynamic_mac); | |
1107 | nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, | |
1108 | ds_cstr(&new_addr)); | |
1109 | ds_destroy(&new_addr); | |
8639f9be ND |
1110 | return true; |
1111 | } | |
1112 | ||
1113 | static void | |
b511690b | 1114 | build_ipam(struct hmap *datapaths, struct hmap *ports) |
8639f9be ND |
1115 | { |
1116 | /* IPAM generally stands for IP address management. In non-virtualized | |
1117 | * world, MAC addresses come with the hardware. But, with virtualized | |
1118 | * workloads, they need to be assigned and managed. This function | |
1119 | * does both IP address management (ipam) and MAC address management | |
1120 | * (macam). */ | |
1121 | ||
8639f9be ND |
1122 | /* If the switch's other_config:subnet is set, allocate new addresses for |
1123 | * ports that have the "dynamic" keyword in their addresses column. */ | |
1124 | struct ovn_datapath *od; | |
1125 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
7cc0741e | 1126 | if (!od->nbs || !od->ipam_info) { |
161ea2c8 NS |
1127 | continue; |
1128 | } | |
1129 | ||
1130 | struct ovn_port *op; | |
1131 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
1132 | const struct nbrec_logical_switch_port *nbsp = | |
1133 | od->nbs->ports[i]; | |
1134 | ||
1135 | if (!nbsp) { | |
8639f9be ND |
1136 | continue; |
1137 | } | |
1138 | ||
161ea2c8 NS |
1139 | op = ovn_port_find(ports, nbsp->name); |
1140 | if (!op || (op->nbsp && op->peer)) { | |
1141 | /* Do not allocate addresses for logical switch ports that | |
1142 | * have a peer. */ | |
8639f9be ND |
1143 | continue; |
1144 | } | |
1145 | ||
161ea2c8 NS |
1146 | for (size_t j = 0; j < nbsp->n_addresses; j++) { |
1147 | if (is_dynamic_lsp_address(nbsp->addresses[j]) | |
1148 | && !nbsp->dynamic_addresses) { | |
1149 | if (!ipam_allocate_addresses(od, op, nbsp->addresses[j]) | |
1150 | || !extract_lsp_addresses(nbsp->dynamic_addresses, | |
1151 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1152 | static struct vlog_rate_limit rl | |
1153 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1154 | VLOG_INFO_RL(&rl, "Failed to allocate address."); | |
1155 | } else { | |
1156 | op->n_lsp_addrs++; | |
8639f9be | 1157 | } |
161ea2c8 | 1158 | break; |
8639f9be ND |
1159 | } |
1160 | } | |
161ea2c8 NS |
1161 | |
1162 | if (!nbsp->n_addresses && nbsp->dynamic_addresses) { | |
1163 | nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, | |
1164 | NULL); | |
1165 | } | |
8639f9be ND |
1166 | } |
1167 | } | |
1168 | } | |
1169 | \f | |
b511690b GS |
1170 | /* Tag allocation for nested containers. |
1171 | * | |
1172 | * For a logical switch port with 'parent_name' and a request to allocate tags, | |
1173 | * keeps a track of all allocated tags. */ | |
1174 | struct tag_alloc_node { | |
1175 | struct hmap_node hmap_node; | |
1176 | char *parent_name; | |
1177 | unsigned long *allocated_tags; /* A bitmap to track allocated tags. */ | |
1178 | }; | |
1179 | ||
1180 | static void | |
1181 | tag_alloc_destroy(struct hmap *tag_alloc_table) | |
1182 | { | |
1183 | struct tag_alloc_node *node; | |
1184 | HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) { | |
1185 | bitmap_free(node->allocated_tags); | |
1186 | free(node->parent_name); | |
1187 | free(node); | |
1188 | } | |
1189 | hmap_destroy(tag_alloc_table); | |
1190 | } | |
1191 | ||
1192 | static struct tag_alloc_node * | |
1193 | tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name) | |
1194 | { | |
1195 | /* If a node for the 'parent_name' exists, return it. */ | |
1196 | struct tag_alloc_node *tag_alloc_node; | |
1197 | HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node, | |
1198 | hash_string(parent_name, 0), | |
1199 | tag_alloc_table) { | |
1200 | if (!strcmp(tag_alloc_node->parent_name, parent_name)) { | |
1201 | return tag_alloc_node; | |
1202 | } | |
1203 | } | |
1204 | ||
1205 | /* Create a new node. */ | |
1206 | tag_alloc_node = xmalloc(sizeof *tag_alloc_node); | |
1207 | tag_alloc_node->parent_name = xstrdup(parent_name); | |
1208 | tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS); | |
1209 | /* Tag 0 is invalid for nested containers. */ | |
1210 | bitmap_set1(tag_alloc_node->allocated_tags, 0); | |
1211 | hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node, | |
1212 | hash_string(parent_name, 0)); | |
1213 | ||
1214 | return tag_alloc_node; | |
1215 | } | |
1216 | ||
1217 | static void | |
1218 | tag_alloc_add_existing_tags(struct hmap *tag_alloc_table, | |
1219 | const struct nbrec_logical_switch_port *nbsp) | |
1220 | { | |
1221 | /* Add the tags of already existing nested containers. If there is no | |
1222 | * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */ | |
1223 | if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) { | |
1224 | return; | |
1225 | } | |
1226 | ||
1227 | struct tag_alloc_node *tag_alloc_node; | |
1228 | tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name); | |
1229 | bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag); | |
1230 | } | |
1231 | ||
1232 | static void | |
1233 | tag_alloc_create_new_tag(struct hmap *tag_alloc_table, | |
1234 | const struct nbrec_logical_switch_port *nbsp) | |
1235 | { | |
1236 | if (!nbsp->tag_request) { | |
1237 | return; | |
1238 | } | |
1239 | ||
1240 | if (nbsp->parent_name && nbsp->parent_name[0] | |
1241 | && *nbsp->tag_request == 0) { | |
1242 | /* For nested containers that need allocation, do the allocation. */ | |
1243 | ||
1244 | if (nbsp->tag) { | |
1245 | /* This has already been allocated. */ | |
1246 | return; | |
1247 | } | |
1248 | ||
1249 | struct tag_alloc_node *tag_alloc_node; | |
1250 | int64_t tag; | |
1251 | tag_alloc_node = tag_alloc_get_node(tag_alloc_table, | |
1252 | nbsp->parent_name); | |
1253 | tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS); | |
1254 | if (tag == MAX_OVN_TAGS) { | |
1255 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
1256 | VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with " | |
1257 | "parent %s", nbsp->parent_name); | |
1258 | return; | |
1259 | } | |
1260 | bitmap_set1(tag_alloc_node->allocated_tags, tag); | |
1261 | nbrec_logical_switch_port_set_tag(nbsp, &tag, 1); | |
1262 | } else if (*nbsp->tag_request != 0) { | |
1263 | /* For everything else, copy the contents of 'tag_request' to 'tag'. */ | |
1264 | nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1); | |
1265 | } | |
1266 | } | |
1267 | \f | |
8639f9be | 1268 | |
6c4f7a8a NS |
1269 | /* |
1270 | * This function checks if the MAC in "address" parameter (if present) is | |
1271 | * different from the one stored in Logical_Switch_Port.dynamic_addresses | |
1272 | * and updates it. | |
1273 | */ | |
1274 | static void | |
1275 | check_and_update_mac_in_dynamic_addresses( | |
1276 | const char *address, | |
1277 | const struct nbrec_logical_switch_port *nbsp) | |
1278 | { | |
1279 | if (!nbsp->dynamic_addresses) { | |
1280 | return; | |
1281 | } | |
1282 | int buf_index = 0; | |
1283 | struct eth_addr ea; | |
1284 | if (!ovs_scan_len(address, &buf_index, | |
1285 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { | |
1286 | return; | |
1287 | } | |
1288 | ||
1289 | struct eth_addr present_ea; | |
1290 | buf_index = 0; | |
1291 | if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index, | |
1292 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea)) | |
1293 | && !eth_addr_equals(ea, present_ea)) { | |
1294 | /* MAC address has changed. Update it */ | |
1295 | char *new_addr = xasprintf( | |
1296 | ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea), | |
1297 |  ->dynamic_addresses[buf_index]); | |
1298 | nbrec_logical_switch_port_set_dynamic_addresses( | |
1299 | nbsp, new_addr); | |
1300 | free(new_addr); | |
1301 | } | |
1302 | } | |
1303 | ||
5868eb24 BP |
1304 | static void |
1305 | join_logical_ports(struct northd_context *ctx, | |
1306 | struct hmap *datapaths, struct hmap *ports, | |
a6095f81 | 1307 | struct hmap *chassis_qdisc_queues, |
b511690b GS |
1308 | struct hmap *tag_alloc_table, struct ovs_list *sb_only, |
1309 | struct ovs_list *nb_only, struct ovs_list *both) | |
5868eb24 BP |
1310 | { |
1311 | hmap_init(ports); | |
417e7e66 BW |
1312 | ovs_list_init(sb_only); |
1313 | ovs_list_init(nb_only); | |
1314 | ovs_list_init(both); | |
5868eb24 BP |
1315 | |
1316 | const struct sbrec_port_binding *sb; | |
1317 | SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) { | |
1318 | struct ovn_port *op = ovn_port_create(ports, sb->logical_port, | |
9975d7be | 1319 | NULL, NULL, sb); |
417e7e66 | 1320 | ovs_list_push_back(sb_only, &op->list); |
5868eb24 BP |
1321 | } |
1322 | ||
1323 | struct ovn_datapath *od; | |
1324 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
1325 | if (od->nbs) { |
1326 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
0ee00741 HK |
1327 | const struct nbrec_logical_switch_port *nbsp |
1328 | = od->nbs->ports[i]; | |
1329 | struct ovn_port *op = ovn_port_find(ports, nbsp->name); | |
9975d7be | 1330 | if (op) { |
0ee00741 | 1331 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
1332 | static struct vlog_rate_limit rl |
1333 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
1334 | VLOG_WARN_RL(&rl, "duplicate logical port %s", | |
0ee00741 | 1335 | nbsp->name); |
9975d7be BP |
1336 | continue; |
1337 | } | |
0ee00741 | 1338 | op->nbsp = nbsp; |
417e7e66 | 1339 | ovs_list_remove(&op->list); |
a6095f81 BS |
1340 | |
1341 | uint32_t queue_id = smap_get_int(&op->sb->options, | |
1342 | "qdisc_queue_id", 0); | |
1343 | if (queue_id && op->sb->chassis) { | |
1344 | add_chassis_queue( | |
1345 | chassis_qdisc_queues, &op->sb->chassis->header_.uuid, | |
1346 | queue_id); | |
1347 | } | |
1348 | ||
417e7e66 | 1349 | ovs_list_push_back(both, &op->list); |
e93b43d6 JP |
1350 | |
1351 | /* This port exists due to a SB binding, but should | |
1352 | * not have been initialized fully. */ | |
1353 | ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs); | |
9975d7be | 1354 | } else { |
0ee00741 | 1355 | op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL); |
417e7e66 | 1356 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
1357 | } |
1358 | ||
5b29422c | 1359 | if (!strcmp(nbsp->type, "localnet")) { |
1360 | od->localnet_port = op; | |
1361 | } | |
1362 | ||
e93b43d6 | 1363 | op->lsp_addrs |
0ee00741 HK |
1364 | = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses); |
1365 | for (size_t j = 0; j < nbsp->n_addresses; j++) { | |
20418099 MS |
1366 | if (!strcmp(nbsp->addresses[j], "unknown") |
1367 | || !strcmp(nbsp->addresses[j], "router")) { | |
e93b43d6 JP |
1368 | continue; |
1369 | } | |
6374d518 | 1370 | if (is_dynamic_lsp_address(nbsp->addresses[j])) { |
8639f9be | 1371 | if (nbsp->dynamic_addresses) { |
6c4f7a8a NS |
1372 | check_and_update_mac_in_dynamic_addresses( |
1373 | nbsp->addresses[j], nbsp); | |
8639f9be ND |
1374 | if (!extract_lsp_addresses(nbsp->dynamic_addresses, |
1375 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1376 | static struct vlog_rate_limit rl | |
1377 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1378 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in " | |
1379 | "logical switch port " | |
1380 | "dynamic_addresses. No " | |
1381 | "MAC address found", | |
1382 | op->nbsp->dynamic_addresses); | |
1383 | continue; | |
1384 | } | |
1385 | } else { | |
1386 | continue; | |
1387 | } | |
1388 | } else if (!extract_lsp_addresses(nbsp->addresses[j], | |
e93b43d6 JP |
1389 | &op->lsp_addrs[op->n_lsp_addrs])) { |
1390 | static struct vlog_rate_limit rl | |
1391 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1392 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical " | |
1393 | "switch port addresses. No MAC " | |
1394 | "address found", | |
0ee00741 | 1395 | op->nbsp->addresses[j]); |
e93b43d6 JP |
1396 | continue; |
1397 | } | |
1398 | op->n_lsp_addrs++; | |
1399 | } | |
1400 | ||
1401 | op->ps_addrs | |
0ee00741 HK |
1402 | = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security); |
1403 | for (size_t j = 0; j < nbsp->n_port_security; j++) { | |
1404 | if (!extract_lsp_addresses(nbsp->port_security[j], | |
e93b43d6 JP |
1405 | &op->ps_addrs[op->n_ps_addrs])) { |
1406 | static struct vlog_rate_limit rl | |
1407 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1408 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in port " | |
1409 | "security. No MAC address found", | |
0ee00741 | 1410 | op->nbsp->port_security[j]); |
e93b43d6 JP |
1411 | continue; |
1412 | } | |
1413 | op->n_ps_addrs++; | |
1414 | } | |
1415 | ||
9975d7be | 1416 | op->od = od; |
8639f9be | 1417 | ipam_add_port_addresses(od, op); |
b511690b | 1418 | tag_alloc_add_existing_tags(tag_alloc_table, nbsp); |
9975d7be BP |
1419 | } |
1420 | } else { | |
1421 | for (size_t i = 0; i < od->nbr->n_ports; i++) { | |
0ee00741 HK |
1422 | const struct nbrec_logical_router_port *nbrp |
1423 | = od->nbr->ports[i]; | |
9975d7be | 1424 | |
4685e523 | 1425 | struct lport_addresses lrp_networks; |
0ee00741 | 1426 | if (!extract_lrp_networks(nbrp, &lrp_networks)) { |
9975d7be BP |
1427 | static struct vlog_rate_limit rl |
1428 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
0ee00741 | 1429 | VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac); |
9975d7be BP |
1430 | continue; |
1431 | } | |
1432 | ||
4685e523 | 1433 | if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) { |
9975d7be BP |
1434 | continue; |
1435 | } | |
1436 | ||
0ee00741 | 1437 | struct ovn_port *op = ovn_port_find(ports, nbrp->name); |
9975d7be | 1438 | if (op) { |
0ee00741 | 1439 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
1440 | static struct vlog_rate_limit rl |
1441 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
1442 | VLOG_WARN_RL(&rl, "duplicate logical router port %s", | |
0ee00741 | 1443 | nbrp->name); |
9975d7be BP |
1444 | continue; |
1445 | } | |
0ee00741 | 1446 | op->nbrp = nbrp; |
417e7e66 BW |
1447 | ovs_list_remove(&op->list); |
1448 | ovs_list_push_back(both, &op->list); | |
4685e523 JP |
1449 | |
1450 | /* This port exists but should not have been | |
1451 | * initialized fully. */ | |
1452 | ovs_assert(!op->lrp_networks.n_ipv4_addrs | |
1453 | && !op->lrp_networks.n_ipv6_addrs); | |
9975d7be | 1454 | } else { |
0ee00741 | 1455 | op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL); |
417e7e66 | 1456 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
1457 | } |
1458 | ||
4685e523 | 1459 | op->lrp_networks = lrp_networks; |
9975d7be | 1460 | op->od = od; |
8639f9be | 1461 | ipam_add_port_addresses(op->od, op); |
41a15b71 MS |
1462 | |
1463 | const char *redirect_chassis = smap_get(&op->nbrp->options, | |
1464 | "redirect-chassis"); | |
b86f4767 | 1465 | if (redirect_chassis || op->nbrp->n_gateway_chassis) { |
41a15b71 MS |
1466 | /* Additional "derived" ovn_port crp represents the |
1467 | * instance of op on the "redirect-chassis". */ | |
1468 | const char *gw_chassis = smap_get(&op->od->nbr->options, | |
1469 | "chassis"); | |
1470 | if (gw_chassis) { | |
1471 | static struct vlog_rate_limit rl | |
1472 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1473 | VLOG_WARN_RL(&rl, "Bad configuration: " | |
1474 | "redirect-chassis configured on port %s " | |
1475 | "on L3 gateway router", nbrp->name); | |
1476 | continue; | |
1477 | } | |
26b9e08d MS |
1478 | if (od->l3dgw_port || od->l3redirect_port) { |
1479 | static struct vlog_rate_limit rl | |
1480 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1481 | VLOG_WARN_RL(&rl, "Bad configuration: multiple ports " | |
1482 | "with redirect-chassis on same logical " | |
1483 | "router %s", od->nbr->name); | |
1484 | continue; | |
1485 | } | |
1486 | ||
41a15b71 MS |
1487 | char *redirect_name = chassis_redirect_name(nbrp->name); |
1488 | struct ovn_port *crp = ovn_port_find(ports, redirect_name); | |
1489 | if (crp) { | |
1490 | crp->derived = true; | |
1491 | crp->nbrp = nbrp; | |
1492 | ovs_list_remove(&crp->list); | |
1493 | ovs_list_push_back(both, &crp->list); | |
1494 | } else { | |
1495 | crp = ovn_port_create(ports, redirect_name, | |
1496 | NULL, nbrp, NULL); | |
1497 | crp->derived = true; | |
1498 | ovs_list_push_back(nb_only, &crp->list); | |
1499 | } | |
1500 | crp->od = od; | |
1501 | free(redirect_name); | |
1502 | ||
1503 | /* Set l3dgw_port and l3redirect_port in od, for later | |
1504 | * use during flow creation. */ | |
26b9e08d MS |
1505 | od->l3dgw_port = op; |
1506 | od->l3redirect_port = crp; | |
41a15b71 | 1507 | } |
5868eb24 | 1508 | } |
9975d7be BP |
1509 | } |
1510 | } | |
1511 | ||
1512 | /* Connect logical router ports, and logical switch ports of type "router", | |
1513 | * to their peers. */ | |
1514 | struct ovn_port *op; | |
1515 | HMAP_FOR_EACH (op, key_node, ports) { | |
41a15b71 | 1516 | if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) { |
0ee00741 | 1517 | const char *peer_name = smap_get(&op->nbsp->options, "router-port"); |
9975d7be BP |
1518 | if (!peer_name) { |
1519 | continue; | |
1520 | } | |
1521 | ||
1522 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 1523 | if (!peer || !peer->nbrp) { |
9975d7be BP |
1524 | continue; |
1525 | } | |
1526 | ||
1527 | peer->peer = op; | |
1528 | op->peer = peer; | |
86e98048 BP |
1529 | op->od->router_ports = xrealloc( |
1530 | op->od->router_ports, | |
1531 | sizeof *op->od->router_ports * (op->od->n_router_ports + 1)); | |
1532 | op->od->router_ports[op->od->n_router_ports++] = op; | |
20418099 MS |
1533 | |
1534 | /* Fill op->lsp_addrs for op->nbsp->addresses[] with | |
1535 | * contents "router", which was skipped in the loop above. */ | |
1536 | for (size_t j = 0; j < op->nbsp->n_addresses; j++) { | |
1537 | if (!strcmp(op->nbsp->addresses[j], "router")) { | |
1538 | if (extract_lrp_networks(peer->nbrp, | |
1539 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1540 | op->n_lsp_addrs++; | |
1541 | } | |
1542 | break; | |
1543 | } | |
1544 | } | |
41a15b71 | 1545 | } else if (op->nbrp && op->nbrp->peer && !op->derived) { |
ad386c3f BP |
1546 | struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer); |
1547 | if (peer) { | |
1548 | if (peer->nbrp) { | |
1549 | op->peer = peer; | |
60fa6dbb | 1550 | } else if (peer->nbsp) { |
ad386c3f BP |
1551 | /* An ovn_port for a switch port of type "router" does have |
1552 | * a router port as its peer (see the case above for | |
1553 | * "router" ports), but this is set via options:router-port | |
1554 | * in Logical_Switch_Port and does not involve the | |
1555 | * Logical_Router_Port's 'peer' column. */ | |
1556 | static struct vlog_rate_limit rl = | |
1557 | VLOG_RATE_LIMIT_INIT(5, 1); | |
1558 | VLOG_WARN_RL(&rl, "Bad configuration: The peer of router " | |
1559 | "port %s is a switch port", op->key); | |
1560 | } | |
1561 | } | |
5868eb24 BP |
1562 | } |
1563 | } | |
1564 | } | |
1565 | ||
e914fb54 MS |
1566 | static void |
1567 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
485d373b | 1568 | uint16_t *port, int *addr_family); |
e914fb54 MS |
1569 | |
1570 | static void | |
1571 | get_router_load_balancer_ips(const struct ovn_datapath *od, | |
485d373b | 1572 | struct sset *all_ips, int *addr_family) |
e914fb54 MS |
1573 | { |
1574 | if (!od->nbr) { | |
1575 | return; | |
1576 | } | |
1577 | ||
1578 | for (int i = 0; i < od->nbr->n_load_balancer; i++) { | |
1579 | struct nbrec_load_balancer *lb = od->nbr->load_balancer[i]; | |
1580 | struct smap *vips = &lb->vips; | |
1581 | struct smap_node *node; | |
1582 | ||
1583 | SMAP_FOR_EACH (node, vips) { | |
1584 | /* node->key contains IP:port or just IP. */ | |
1585 | char *ip_address = NULL; | |
1586 | uint16_t port; | |
1587 | ||
485d373b MM |
1588 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port, |
1589 | addr_family); | |
e914fb54 MS |
1590 | if (!ip_address) { |
1591 | continue; | |
1592 | } | |
1593 | ||
1594 | if (!sset_contains(all_ips, ip_address)) { | |
1595 | sset_add(all_ips, ip_address); | |
1596 | } | |
1597 | ||
1598 | free(ip_address); | |
1599 | } | |
1600 | } | |
1601 | } | |
1602 | ||
f40c5588 MS |
1603 | /* Returns an array of strings, each consisting of a MAC address followed |
1604 | * by one or more IP addresses, and if the port is a distributed gateway | |
1605 | * port, followed by 'is_chassis_resident("LPORT_NAME")', where the | |
1606 | * LPORT_NAME is the name of the L3 redirect port or the name of the | |
1607 | * logical_port specified in a NAT rule. These strings include the | |
1608 | * external IP addresses of all NAT rules defined on that router, and all | |
1609 | * of the IP addresses used in load balancer VIPs defined on that router. | |
e914fb54 | 1610 | * |
f40c5588 MS |
1611 | * The caller must free each of the n returned strings with free(), |
1612 | * and must free the returned array when it is no longer needed. */ | |
1613 | static char ** | |
1614 | get_nat_addresses(const struct ovn_port *op, size_t *n) | |
e914fb54 | 1615 | { |
f40c5588 | 1616 | size_t n_nats = 0; |
e914fb54 MS |
1617 | struct eth_addr mac; |
1618 | if (!op->nbrp || !op->od || !op->od->nbr | |
1619 | || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer) | |
1620 | || !eth_addr_from_string(op->nbrp->mac, &mac)) { | |
f40c5588 | 1621 | *n = n_nats; |
e914fb54 MS |
1622 | return NULL; |
1623 | } | |
1624 | ||
f40c5588 MS |
1625 | struct ds c_addresses = DS_EMPTY_INITIALIZER; |
1626 | ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); | |
1627 | bool central_ip_address = false; | |
1628 | ||
1629 | char **addresses; | |
1630 | addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1)); | |
e914fb54 MS |
1631 | |
1632 | /* Get NAT IP addresses. */ | |
f40c5588 | 1633 | for (size_t i = 0; i < op->od->nbr->n_nat; i++) { |
e914fb54 MS |
1634 | const struct nbrec_nat *nat = op->od->nbr->nat[i]; |
1635 | ovs_be32 ip, mask; | |
1636 | ||
1637 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
1638 | if (error || mask != OVS_BE32_MAX) { | |
1639 | free(error); | |
1640 | continue; | |
1641 | } | |
26b9e08d MS |
1642 | |
1643 | /* Determine whether this NAT rule satisfies the conditions for | |
1644 | * distributed NAT processing. */ | |
1645 | if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat") | |
1646 | && nat->logical_port && nat->external_mac) { | |
1647 | /* Distributed NAT rule. */ | |
f40c5588 MS |
1648 | if (eth_addr_from_string(nat->external_mac, &mac)) { |
1649 | struct ds address = DS_EMPTY_INITIALIZER; | |
1650 | ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); | |
1651 | ds_put_format(&address, " %s", nat->external_ip); | |
1652 | ds_put_format(&address, " is_chassis_resident(\"%s\")", | |
1653 | nat->logical_port); | |
1654 | addresses[n_nats++] = ds_steal_cstr(&address); | |
1655 | } | |
26b9e08d MS |
1656 | } else { |
1657 | /* Centralized NAT rule, either on gateway router or distributed | |
1658 | * router. */ | |
f40c5588 MS |
1659 | ds_put_format(&c_addresses, " %s", nat->external_ip); |
1660 | central_ip_address = true; | |
26b9e08d | 1661 | } |
e914fb54 MS |
1662 | } |
1663 | ||
1664 | /* A set to hold all load-balancer vips. */ | |
1665 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
485d373b MM |
1666 | int addr_family; |
1667 | get_router_load_balancer_ips(op->od, &all_ips, &addr_family); | |
e914fb54 MS |
1668 | |
1669 | const char *ip_address; | |
1670 | SSET_FOR_EACH (ip_address, &all_ips) { | |
f40c5588 MS |
1671 | ds_put_format(&c_addresses, " %s", ip_address); |
1672 | central_ip_address = true; | |
e914fb54 MS |
1673 | } |
1674 | sset_destroy(&all_ips); | |
1675 | ||
f40c5588 MS |
1676 | if (central_ip_address) { |
1677 | /* Gratuitous ARP for centralized NAT rules on distributed gateway | |
1678 | * ports should be restricted to the "redirect-chassis". */ | |
1679 | if (op->od->l3redirect_port) { | |
1680 | ds_put_format(&c_addresses, " is_chassis_resident(%s)", | |
1681 | op->od->l3redirect_port->json_key); | |
1682 | } | |
1683 | ||
1684 | addresses[n_nats++] = ds_steal_cstr(&c_addresses); | |
26b9e08d MS |
1685 | } |
1686 | ||
f40c5588 MS |
1687 | *n = n_nats; |
1688 | ||
1689 | return addresses; | |
e914fb54 MS |
1690 | } |
1691 | ||
b86f4767 | 1692 | static bool |
1693 | gateway_chassis_equal(const struct nbrec_gateway_chassis *nb_gwc, | |
1694 | const struct sbrec_chassis *nb_gwc_c, | |
1695 | const struct sbrec_gateway_chassis *sb_gwc) | |
1696 | { | |
79371ff5 | 1697 | bool equal = !strcmp(nb_gwc->name, sb_gwc->name) |
1698 | && nb_gwc->priority == sb_gwc->priority | |
1699 | && smap_equal(&nb_gwc->options, &sb_gwc->options) | |
1700 | && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids); | |
1701 | ||
1702 | if (!equal) { | |
1703 | return false; | |
1704 | } | |
1705 | ||
1706 | /* If everything else matched and we were unable to find the SBDB | |
1707 | * Chassis entry at this time, assume a match and return true. | |
1708 | * This happens when an ovn-controller is restarting and the Chassis | |
1709 | * entry is gone away momentarily */ | |
1710 | return !nb_gwc_c | |
1711 | || (sb_gwc->chassis && !strcmp(nb_gwc_c->name, | |
1712 | sb_gwc->chassis->name)); | |
b86f4767 | 1713 | } |
1714 | ||
1715 | static bool | |
1716 | sbpb_gw_chassis_needs_update( | |
1717 | const struct sbrec_port_binding *port_binding, | |
1718 | const struct nbrec_logical_router_port *lrp, | |
1719 | const struct chassis_index *chassis_index) | |
1720 | { | |
1721 | if (!lrp || !port_binding) { | |
1722 | return false; | |
1723 | } | |
1724 | ||
1725 | /* These arrays are used to collect valid Gateway_Chassis and valid | |
1726 | * Chassis records from the Logical_Router_Port Gateway_Chassis list, | |
1727 | * we ignore the ones we can't match on the SBDB */ | |
1728 | struct nbrec_gateway_chassis **lrp_gwc = xzalloc(lrp->n_gateway_chassis * | |
1729 | sizeof *lrp_gwc); | |
1730 | const struct sbrec_chassis **lrp_gwc_c = xzalloc(lrp->n_gateway_chassis * | |
1731 | sizeof *lrp_gwc_c); | |
1732 | ||
1733 | /* Count the number of gateway chassis chassis names from the logical | |
1734 | * router port that we are able to match on the southbound database */ | |
1735 | int lrp_n_gateway_chassis = 0; | |
1736 | int n; | |
1737 | for (n = 0; n < lrp->n_gateway_chassis; n++) { | |
1738 | ||
1739 | if (!lrp->gateway_chassis[n]->chassis_name) { | |
1740 | continue; | |
1741 | } | |
1742 | ||
1743 | const struct sbrec_chassis *chassis = | |
1744 | chassis_lookup_by_name(chassis_index, | |
1745 | lrp->gateway_chassis[n]->chassis_name); | |
1746 | ||
79371ff5 | 1747 | lrp_gwc_c[lrp_n_gateway_chassis] = chassis; |
1748 | lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n]; | |
1749 | lrp_n_gateway_chassis++; | |
1750 | if (!chassis) { | |
b86f4767 | 1751 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
1752 | VLOG_WARN_RL( | |
1753 | &rl, "Chassis name %s referenced in NBDB via Gateway_Chassis " | |
1754 | "on logical router port %s does not exist in SBDB", | |
1755 | lrp->gateway_chassis[n]->chassis_name, lrp->name); | |
1756 | } | |
1757 | } | |
1758 | ||
1759 | /* Basic check, different amount of Gateway_Chassis means that we | |
1760 | * need to update southbound database Port_Binding */ | |
1761 | if (lrp_n_gateway_chassis != port_binding->n_gateway_chassis) { | |
1762 | free(lrp_gwc_c); | |
1763 | free(lrp_gwc); | |
1764 | return true; | |
1765 | } | |
1766 | ||
1767 | for (n = 0; n < lrp_n_gateway_chassis; n++) { | |
1768 | int i; | |
1769 | /* For each of the valid gw chassis on the lrp, check if there's | |
1770 | * a match on the Port_Binding list, we assume order is not | |
1771 | * persisted */ | |
1772 | for (i = 0; i < port_binding->n_gateway_chassis; i++) { | |
1773 | if (gateway_chassis_equal(lrp_gwc[n], | |
1774 | lrp_gwc_c[n], | |
1775 | port_binding->gateway_chassis[i])) { | |
1776 | break; /* we found a match */ | |
1777 | } | |
1778 | } | |
1779 | ||
1780 | /* if no Port_Binding gateway chassis matched for the entry... */ | |
1781 | if (i == port_binding->n_gateway_chassis) { | |
1782 | free(lrp_gwc_c); | |
1783 | free(lrp_gwc); | |
1784 | return true; /* found no match for this gateway chassis on lrp */ | |
1785 | } | |
1786 | } | |
1787 | ||
1788 | /* no need for update, all ports matched */ | |
1789 | free(lrp_gwc_c); | |
1790 | free(lrp_gwc); | |
1791 | return false; | |
1792 | } | |
1793 | ||
1794 | /* This functions translates the gw chassis on the nb database | |
1795 | * to sb database entries, the only difference is that SB database | |
1796 | * Gateway_Chassis table references the chassis directly instead | |
1797 | * of using the name */ | |
5868eb24 | 1798 | static void |
b86f4767 | 1799 | copy_gw_chassis_from_nbrp_to_sbpb( |
1800 | struct northd_context *ctx, | |
1801 | const struct nbrec_logical_router_port *lrp, | |
1802 | const struct chassis_index *chassis_index, | |
1803 | const struct sbrec_port_binding *port_binding) { | |
1804 | ||
1805 | if (!lrp || !port_binding || !lrp->n_gateway_chassis) { | |
1806 | return; | |
1807 | } | |
1808 | ||
1809 | struct sbrec_gateway_chassis **gw_chassis = NULL; | |
1810 | int n_gwc = 0; | |
1811 | int n; | |
1812 | ||
1813 | /* XXX: This can be improved. This code will generate a set of new | |
1814 | * Gateway_Chassis and push them all in a single transaction, instead | |
1815 | * this would be more optimal if we just add/update/remove the rows in | |
1816 | * the southbound db that need to change. We don't expect lots of | |
1817 | * changes to the Gateway_Chassis table, but if that proves to be wrong | |
1818 | * we should optimize this. */ | |
1819 | for (n = 0; n < lrp->n_gateway_chassis; n++) { | |
1820 | struct nbrec_gateway_chassis *lrp_gwc = lrp->gateway_chassis[n]; | |
1821 | if (!lrp_gwc->chassis_name) { | |
1822 | continue; | |
1823 | } | |
1824 | ||
1825 | const struct sbrec_chassis *chassis = | |
1826 | chassis_lookup_by_name(chassis_index, lrp_gwc->chassis_name); | |
1827 | ||
b86f4767 | 1828 | gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof *gw_chassis); |
1829 | ||
1830 | struct sbrec_gateway_chassis *pb_gwc = | |
1831 | sbrec_gateway_chassis_insert(ctx->ovnsb_txn); | |
1832 | ||
1833 | sbrec_gateway_chassis_set_name(pb_gwc, lrp_gwc->name); | |
1834 | sbrec_gateway_chassis_set_priority(pb_gwc, lrp_gwc->priority); | |
1835 | sbrec_gateway_chassis_set_chassis(pb_gwc, chassis); | |
1836 | sbrec_gateway_chassis_set_options(pb_gwc, &lrp_gwc->options); | |
1837 | sbrec_gateway_chassis_set_external_ids(pb_gwc, &lrp_gwc->external_ids); | |
1838 | ||
1839 | gw_chassis[n_gwc++] = pb_gwc; | |
1840 | } | |
1841 | sbrec_port_binding_set_gateway_chassis(port_binding, gw_chassis, n_gwc); | |
1842 | free(gw_chassis); | |
1843 | } | |
1844 | ||
1845 | static void | |
1846 | ovn_port_update_sbrec(struct northd_context *ctx, | |
1847 | const struct ovn_port *op, | |
1848 | const struct chassis_index *chassis_index, | |
a6095f81 | 1849 | struct hmap *chassis_qdisc_queues) |
5868eb24 BP |
1850 | { |
1851 | sbrec_port_binding_set_datapath(op->sb, op->od->sb); | |
0ee00741 | 1852 | if (op->nbrp) { |
c1645003 | 1853 | /* If the router is for l3 gateway, it resides on a chassis |
17bac0ff | 1854 | * and its port type is "l3gateway". */ |
71f21279 | 1855 | const char *chassis_name = smap_get(&op->od->nbr->options, "chassis"); |
41a15b71 MS |
1856 | if (op->derived) { |
1857 | sbrec_port_binding_set_type(op->sb, "chassisredirect"); | |
71f21279 | 1858 | } else if (chassis_name) { |
17bac0ff | 1859 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1860 | } else { |
1861 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1862 | } | |
9975d7be | 1863 | |
c1645003 GS |
1864 | struct smap new; |
1865 | smap_init(&new); | |
41a15b71 MS |
1866 | if (op->derived) { |
1867 | const char *redirect_chassis = smap_get(&op->nbrp->options, | |
1868 | "redirect-chassis"); | |
b86f4767 | 1869 | if (op->nbrp->n_gateway_chassis && redirect_chassis) { |
1870 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
1871 | VLOG_WARN_RL( | |
1872 | &rl, "logical router port %s has both options:" | |
1873 | "redirect-chassis and gateway_chassis populated " | |
1874 | "redirect-chassis will be ignored in favour of " | |
1875 | "gateway chassis", op->nbrp->name); | |
1876 | } | |
1877 | ||
1878 | if (op->nbrp->n_gateway_chassis) { | |
1879 | if (sbpb_gw_chassis_needs_update(op->sb, op->nbrp, | |
1880 | chassis_index)) { | |
1881 | copy_gw_chassis_from_nbrp_to_sbpb(ctx, op->nbrp, | |
1882 | chassis_index, op->sb); | |
1883 | } | |
1884 | ||
1885 | } else if (redirect_chassis) { | |
b86f4767 | 1886 | /* Handle ports that had redirect-chassis option attached |
1da17a0b | 1887 | * to them, and for backwards compatibility convert them |
1888 | * to a single Gateway_Chassis entry */ | |
b86f4767 | 1889 | const struct sbrec_chassis *chassis = |
1890 | chassis_lookup_by_name(chassis_index, redirect_chassis); | |
1891 | if (chassis) { | |
1892 | /* If we found the chassis, and the gw chassis on record | |
1893 | * differs from what we expect go ahead and update */ | |
13ad61c4 | 1894 | if (op->sb->n_gateway_chassis != 1 |
69224120 | 1895 | || !op->sb->gateway_chassis[0]->chassis |
b86f4767 | 1896 | || strcmp(op->sb->gateway_chassis[0]->chassis->name, |
1897 | chassis->name) | |
1898 | || op->sb->gateway_chassis[0]->priority != 0) { | |
1899 | /* Construct a single Gateway_Chassis entry on the | |
1900 | * Port_Binding attached to the redirect_chassis | |
1901 | * name */ | |
1902 | struct sbrec_gateway_chassis *gw_chassis = | |
1903 | sbrec_gateway_chassis_insert(ctx->ovnsb_txn); | |
1904 | ||
1905 | char *gwc_name = xasprintf("%s_%s", op->nbrp->name, | |
1906 | chassis->name); | |
1907 | ||
1908 | /* XXX: Again, here, we could just update an existing | |
1909 | * Gateway_Chassis, instead of creating a new one | |
1910 | * and replacing it */ | |
1911 | sbrec_gateway_chassis_set_name(gw_chassis, gwc_name); | |
1912 | sbrec_gateway_chassis_set_priority(gw_chassis, 0); | |
1913 | sbrec_gateway_chassis_set_chassis(gw_chassis, chassis); | |
1914 | sbrec_gateway_chassis_set_external_ids(gw_chassis, | |
1915 | &op->nbrp->external_ids); | |
1916 | sbrec_port_binding_set_gateway_chassis(op->sb, | |
1917 | &gw_chassis, 1); | |
1918 | free(gwc_name); | |
1919 | } | |
1920 | } else { | |
1921 | VLOG_WARN("chassis name '%s' from redirect from logical " | |
1922 | " router port '%s' redirect-chassis not found", | |
1923 | redirect_chassis, op->nbrp->name); | |
1924 | if (op->sb->n_gateway_chassis) { | |
1925 | sbrec_port_binding_set_gateway_chassis(op->sb, NULL, | |
1926 | 0); | |
1927 | } | |
1928 | } | |
41a15b71 MS |
1929 | } |
1930 | smap_add(&new, "distributed-port", op->nbrp->name); | |
1931 | } else { | |
7b997d4f MM |
1932 | if (op->peer) { |
1933 | smap_add(&new, "peer", op->peer->key); | |
1934 | } | |
71f21279 BP |
1935 | if (chassis_name) { |
1936 | smap_add(&new, "l3gateway-chassis", chassis_name); | |
41a15b71 | 1937 | } |
c1645003 GS |
1938 | } |
1939 | sbrec_port_binding_set_options(op->sb, &new); | |
1940 | smap_destroy(&new); | |
9975d7be BP |
1941 | |
1942 | sbrec_port_binding_set_parent_port(op->sb, NULL); | |
1943 | sbrec_port_binding_set_tag(op->sb, NULL, 0); | |
1944 | sbrec_port_binding_set_mac(op->sb, NULL, 0); | |
4a680bff BP |
1945 | |
1946 | struct smap ids = SMAP_INITIALIZER(&ids); | |
1947 | sbrec_port_binding_set_external_ids(op->sb, &ids); | |
9975d7be | 1948 | } else { |
0ee00741 | 1949 | if (strcmp(op->nbsp->type, "router")) { |
a6095f81 BS |
1950 | uint32_t queue_id = smap_get_int( |
1951 | &op->sb->options, "qdisc_queue_id", 0); | |
1952 | bool has_qos = port_has_qos_params(&op->nbsp->options); | |
1953 | struct smap options; | |
1954 | ||
1955 | if (op->sb->chassis && has_qos && !queue_id) { | |
1956 | queue_id = allocate_chassis_queueid(chassis_qdisc_queues, | |
1957 | op->sb->chassis); | |
1958 | } else if (!has_qos && queue_id) { | |
1959 | free_chassis_queueid(chassis_qdisc_queues, | |
1960 | op->sb->chassis, | |
1961 | queue_id); | |
1962 | queue_id = 0; | |
1963 | } | |
1964 | ||
1965 | smap_clone(&options, &op->nbsp->options); | |
1966 | if (queue_id) { | |
1967 | smap_add_format(&options, | |
1968 | "qdisc_queue_id", "%d", queue_id); | |
1969 | } | |
1970 | sbrec_port_binding_set_options(op->sb, &options); | |
1971 | smap_destroy(&options); | |
173acc1c MM |
1972 | if (ovn_is_known_nb_lsp_type(op->nbsp->type)) { |
1973 | sbrec_port_binding_set_type(op->sb, op->nbsp->type); | |
1974 | } else { | |
1975 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
1976 | VLOG_WARN_RL( | |
1977 | &rl, "Unknown port type '%s' set on logical switch '%s'.", | |
1978 | op->nbsp->type, op->nbsp->name); | |
1979 | } | |
9975d7be | 1980 | } else { |
c1645003 GS |
1981 | const char *chassis = NULL; |
1982 | if (op->peer && op->peer->od && op->peer->od->nbr) { | |
1983 | chassis = smap_get(&op->peer->od->nbr->options, "chassis"); | |
1984 | } | |
1985 | ||
1986 | /* A switch port connected to a gateway router is also of | |
17bac0ff | 1987 | * type "l3gateway". */ |
c1645003 | 1988 | if (chassis) { |
17bac0ff | 1989 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1990 | } else { |
1991 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1992 | } | |
9975d7be | 1993 | |
7b997d4f MM |
1994 | const char *router_port = smap_get(&op->nbsp->options, |
1995 | "router-port"); | |
1996 | if (router_port || chassis) { | |
1997 | struct smap new; | |
1998 | smap_init(&new); | |
1999 | if (router_port) { | |
2000 | smap_add(&new, "peer", router_port); | |
2001 | } | |
2002 | if (chassis) { | |
2003 | smap_add(&new, "l3gateway-chassis", chassis); | |
2004 | } | |
2005 | sbrec_port_binding_set_options(op->sb, &new); | |
2006 | smap_destroy(&new); | |
c1645003 | 2007 | } |
8439c2eb CSV |
2008 | |
2009 | const char *nat_addresses = smap_get(&op->nbsp->options, | |
2010 | "nat-addresses"); | |
e914fb54 | 2011 | if (nat_addresses && !strcmp(nat_addresses, "router")) { |
26b9e08d MS |
2012 | if (op->peer && op->peer->od |
2013 | && (chassis || op->peer->od->l3redirect_port)) { | |
f40c5588 MS |
2014 | size_t n_nats; |
2015 | char **nats = get_nat_addresses(op->peer, &n_nats); | |
2016 | if (n_nats) { | |
2017 | sbrec_port_binding_set_nat_addresses(op->sb, | |
2018 | (const char **) nats, n_nats); | |
2019 | for (size_t i = 0; i < n_nats; i++) { | |
2020 | free(nats[i]); | |
2021 | } | |
e914fb54 | 2022 | free(nats); |
f40c5588 MS |
2023 | } else { |
2024 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
e914fb54 | 2025 | } |
f40c5588 MS |
2026 | } else { |
2027 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
e914fb54 | 2028 | } |
26b9e08d MS |
2029 | /* Only accept manual specification of ethernet address |
2030 | * followed by IPv4 addresses on type "l3gateway" ports. */ | |
2031 | } else if (nat_addresses && chassis) { | |
8439c2eb CSV |
2032 | struct lport_addresses laddrs; |
2033 | if (!extract_lsp_addresses(nat_addresses, &laddrs)) { | |
2034 | static struct vlog_rate_limit rl = | |
2035 | VLOG_RATE_LIMIT_INIT(1, 1); | |
2036 | VLOG_WARN_RL(&rl, "Error extracting nat-addresses."); | |
f40c5588 | 2037 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); |
8439c2eb | 2038 | } else { |
f40c5588 MS |
2039 | sbrec_port_binding_set_nat_addresses(op->sb, |
2040 | &nat_addresses, 1); | |
8439c2eb CSV |
2041 | destroy_lport_addresses(&laddrs); |
2042 | } | |
f40c5588 MS |
2043 | } else { |
2044 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
8439c2eb | 2045 | } |
9975d7be | 2046 | } |
0ee00741 HK |
2047 | sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name); |
2048 | sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag); | |
2049 | sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses, | |
2050 | op->nbsp->n_addresses); | |
4a680bff BP |
2051 | |
2052 | struct smap ids = SMAP_INITIALIZER(&ids); | |
37737b96 DAS |
2053 | smap_clone(&ids, &op->nbsp->external_ids); |
2054 | const char *name = smap_get(&ids, "neutron:port_name"); | |
4a680bff BP |
2055 | if (name && name[0]) { |
2056 | smap_add(&ids, "name", name); | |
2057 | } | |
2058 | sbrec_port_binding_set_external_ids(op->sb, &ids); | |
37737b96 | 2059 | smap_destroy(&ids); |
9975d7be | 2060 | } |
5868eb24 BP |
2061 | } |
2062 | ||
6e31816f CSV |
2063 | /* Remove mac_binding entries that refer to logical_ports which are |
2064 | * deleted. */ | |
2065 | static void | |
2066 | cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports) | |
2067 | { | |
2068 | const struct sbrec_mac_binding *b, *n; | |
2069 | SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) { | |
2070 | if (!ovn_port_find(ports, b->logical_port)) { | |
2071 | sbrec_mac_binding_delete(b); | |
2072 | } | |
2073 | } | |
2074 | } | |
2075 | ||
0bac7164 | 2076 | /* Updates the southbound Port_Binding table so that it contains the logical |
80f408f4 | 2077 | * switch ports specified by the northbound database. |
0bac7164 BP |
2078 | * |
2079 | * Initializes 'ports' to contain a "struct ovn_port" for every logical port, | |
2080 | * using the "struct ovn_datapath"s in 'datapaths' to look up logical | |
2081 | * datapaths. */ | |
5868eb24 BP |
2082 | static void |
2083 | build_ports(struct northd_context *ctx, struct hmap *datapaths, | |
b86f4767 | 2084 | const struct chassis_index *chassis_index, struct hmap *ports) |
5868eb24 BP |
2085 | { |
2086 | struct ovs_list sb_only, nb_only, both; | |
a6095f81 BS |
2087 | struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table); |
2088 | struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues); | |
5868eb24 | 2089 | |
a6095f81 BS |
2090 | join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues, |
2091 | &tag_alloc_table, &sb_only, &nb_only, &both); | |
5868eb24 | 2092 | |
5868eb24 | 2093 | struct ovn_port *op, *next; |
b511690b GS |
2094 | /* For logical ports that are in both databases, update the southbound |
2095 | * record based on northbound data. Also index the in-use tunnel_keys. | |
2096 | * For logical ports that are in NB database, do any tag allocation | |
2097 | * needed. */ | |
5868eb24 | 2098 | LIST_FOR_EACH_SAFE (op, next, list, &both) { |
b511690b GS |
2099 | if (op->nbsp) { |
2100 | tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp); | |
2101 | } | |
b86f4767 | 2102 | ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues); |
5868eb24 BP |
2103 | |
2104 | add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key); | |
2105 | if (op->sb->tunnel_key > op->od->port_key_hint) { | |
2106 | op->od->port_key_hint = op->sb->tunnel_key; | |
2107 | } | |
2108 | } | |
2109 | ||
2110 | /* Add southbound record for each unmatched northbound record. */ | |
2111 | LIST_FOR_EACH_SAFE (op, next, list, &nb_only) { | |
2112 | uint16_t tunnel_key = ovn_port_allocate_key(op->od); | |
2113 | if (!tunnel_key) { | |
2114 | continue; | |
2115 | } | |
2116 | ||
2117 | op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn); | |
b86f4767 | 2118 | ovn_port_update_sbrec(ctx, op, chassis_index, &chassis_qdisc_queues); |
5868eb24 BP |
2119 | |
2120 | sbrec_port_binding_set_logical_port(op->sb, op->key); | |
2121 | sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key); | |
2122 | } | |
2123 | ||
6e31816f CSV |
2124 | bool remove_mac_bindings = false; |
2125 | if (!ovs_list_is_empty(&sb_only)) { | |
2126 | remove_mac_bindings = true; | |
2127 | } | |
2128 | ||
5868eb24 BP |
2129 | /* Delete southbound records without northbound matches. */ |
2130 | LIST_FOR_EACH_SAFE(op, next, list, &sb_only) { | |
417e7e66 | 2131 | ovs_list_remove(&op->list); |
5868eb24 BP |
2132 | sbrec_port_binding_delete(op->sb); |
2133 | ovn_port_destroy(ports, op); | |
2134 | } | |
6e31816f CSV |
2135 | if (remove_mac_bindings) { |
2136 | cleanup_mac_bindings(ctx, ports); | |
2137 | } | |
b511690b GS |
2138 | |
2139 | tag_alloc_destroy(&tag_alloc_table); | |
a6095f81 | 2140 | destroy_chassis_queues(&chassis_qdisc_queues); |
5868eb24 BP |
2141 | } |
2142 | \f | |
2143 | #define OVN_MIN_MULTICAST 32768 | |
2144 | #define OVN_MAX_MULTICAST 65535 | |
2145 | ||
2146 | struct multicast_group { | |
2147 | const char *name; | |
2148 | uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */ | |
2149 | }; | |
2150 | ||
2151 | #define MC_FLOOD "_MC_flood" | |
2152 | static const struct multicast_group mc_flood = { MC_FLOOD, 65535 }; | |
2153 | ||
2154 | #define MC_UNKNOWN "_MC_unknown" | |
2155 | static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 }; | |
2156 | ||
2157 | static bool | |
2158 | multicast_group_equal(const struct multicast_group *a, | |
2159 | const struct multicast_group *b) | |
2160 | { | |
2161 | return !strcmp(a->name, b->name) && a->key == b->key; | |
2162 | } | |
2163 | ||
2164 | /* Multicast group entry. */ | |
2165 | struct ovn_multicast { | |
2166 | struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */ | |
2167 | struct ovn_datapath *datapath; | |
2168 | const struct multicast_group *group; | |
2169 | ||
2170 | struct ovn_port **ports; | |
2171 | size_t n_ports, allocated_ports; | |
2172 | }; | |
2173 | ||
2174 | static uint32_t | |
2175 | ovn_multicast_hash(const struct ovn_datapath *datapath, | |
2176 | const struct multicast_group *group) | |
2177 | { | |
2178 | return hash_pointer(datapath, group->key); | |
2179 | } | |
2180 | ||
2181 | static struct ovn_multicast * | |
2182 | ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath, | |
2183 | const struct multicast_group *group) | |
2184 | { | |
2185 | struct ovn_multicast *mc; | |
2186 | ||
2187 | HMAP_FOR_EACH_WITH_HASH (mc, hmap_node, | |
2188 | ovn_multicast_hash(datapath, group), mcgroups) { | |
2189 | if (mc->datapath == datapath | |
2190 | && multicast_group_equal(mc->group, group)) { | |
2191 | return mc; | |
4edcdcf4 RB |
2192 | } |
2193 | } | |
5868eb24 BP |
2194 | return NULL; |
2195 | } | |
2196 | ||
2197 | static void | |
2198 | ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group, | |
2199 | struct ovn_port *port) | |
2200 | { | |
2201 | struct ovn_datapath *od = port->od; | |
2202 | struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group); | |
2203 | if (!mc) { | |
2204 | mc = xmalloc(sizeof *mc); | |
2205 | hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group)); | |
2206 | mc->datapath = od; | |
2207 | mc->group = group; | |
2208 | mc->n_ports = 0; | |
2209 | mc->allocated_ports = 4; | |
2210 | mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports); | |
2211 | } | |
2212 | if (mc->n_ports >= mc->allocated_ports) { | |
2213 | mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports, | |
2214 | sizeof *mc->ports); | |
2215 | } | |
2216 | mc->ports[mc->n_ports++] = port; | |
2217 | } | |
4edcdcf4 | 2218 | |
5868eb24 BP |
2219 | static void |
2220 | ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc) | |
2221 | { | |
2222 | if (mc) { | |
2223 | hmap_remove(mcgroups, &mc->hmap_node); | |
2224 | free(mc->ports); | |
2225 | free(mc); | |
2226 | } | |
2227 | } | |
4edcdcf4 | 2228 | |
5868eb24 BP |
2229 | static void |
2230 | ovn_multicast_update_sbrec(const struct ovn_multicast *mc, | |
2231 | const struct sbrec_multicast_group *sb) | |
2232 | { | |
2233 | struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports); | |
2234 | for (size_t i = 0; i < mc->n_ports; i++) { | |
2235 | ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb); | |
2236 | } | |
2237 | sbrec_multicast_group_set_ports(sb, ports, mc->n_ports); | |
2238 | free(ports); | |
4edcdcf4 | 2239 | } |
bd39395f | 2240 | \f |
48605550 | 2241 | /* Logical flow generation. |
bd39395f | 2242 | * |
48605550 | 2243 | * This code generates the Logical_Flow table in the southbound database, as a |
bd39395f BP |
2244 | * function of most of the northbound database. |
2245 | */ | |
2246 | ||
5868eb24 BP |
2247 | struct ovn_lflow { |
2248 | struct hmap_node hmap_node; | |
bd39395f | 2249 | |
5868eb24 | 2250 | struct ovn_datapath *od; |
880fcd14 | 2251 | enum ovn_stage stage; |
5868eb24 BP |
2252 | uint16_t priority; |
2253 | char *match; | |
2254 | char *actions; | |
17bfa2aa | 2255 | char *stage_hint; |
d8026bbf | 2256 | const char *where; |
bd39395f BP |
2257 | }; |
2258 | ||
2259 | static size_t | |
5868eb24 | 2260 | ovn_lflow_hash(const struct ovn_lflow *lflow) |
bd39395f | 2261 | { |
5868eb24 | 2262 | size_t hash = uuid_hash(&lflow->od->key); |
880fcd14 | 2263 | hash = hash_2words((lflow->stage << 16) | lflow->priority, hash); |
5868eb24 BP |
2264 | hash = hash_string(lflow->match, hash); |
2265 | return hash_string(lflow->actions, hash); | |
bd39395f BP |
2266 | } |
2267 | ||
5868eb24 BP |
2268 | static bool |
2269 | ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b) | |
2270 | { | |
2271 | return (a->od == b->od | |
880fcd14 | 2272 | && a->stage == b->stage |
5868eb24 BP |
2273 | && a->priority == b->priority |
2274 | && !strcmp(a->match, b->match) | |
2275 | && !strcmp(a->actions, b->actions)); | |
2276 | } | |
2277 | ||
2278 | static void | |
2279 | ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od, | |
d8026bbf | 2280 | enum ovn_stage stage, uint16_t priority, |
17bfa2aa HZ |
2281 | char *match, char *actions, char *stage_hint, |
2282 | const char *where) | |
bd39395f | 2283 | { |
5868eb24 | 2284 | lflow->od = od; |
880fcd14 | 2285 | lflow->stage = stage; |
5868eb24 BP |
2286 | lflow->priority = priority; |
2287 | lflow->match = match; | |
2288 | lflow->actions = actions; | |
17bfa2aa | 2289 | lflow->stage_hint = stage_hint; |
d8026bbf | 2290 | lflow->where = where; |
bd39395f BP |
2291 | } |
2292 | ||
48605550 | 2293 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
bd39395f | 2294 | static void |
d8026bbf BP |
2295 | ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od, |
2296 | enum ovn_stage stage, uint16_t priority, | |
17bfa2aa HZ |
2297 | const char *match, const char *actions, |
2298 | const char *stage_hint, const char *where) | |
5868eb24 | 2299 | { |
9a9961d2 BP |
2300 | ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od)); |
2301 | ||
5868eb24 | 2302 | struct ovn_lflow *lflow = xmalloc(sizeof *lflow); |
880fcd14 | 2303 | ovn_lflow_init(lflow, od, stage, priority, |
17bfa2aa HZ |
2304 | xstrdup(match), xstrdup(actions), |
2305 | nullable_xstrdup(stage_hint), where); | |
5868eb24 BP |
2306 | hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow)); |
2307 | } | |
2308 | ||
d8026bbf | 2309 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
17bfa2aa HZ |
2310 | #define ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \ |
2311 | ACTIONS, STAGE_HINT) \ | |
2312 | ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \ | |
2313 | STAGE_HINT, OVS_SOURCE_LOCATOR) | |
2314 | ||
d8026bbf | 2315 | #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \ |
17bfa2aa HZ |
2316 | ovn_lflow_add_with_hint(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, \ |
2317 | ACTIONS, NULL) | |
d8026bbf | 2318 | |
5868eb24 BP |
2319 | static struct ovn_lflow * |
2320 | ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od, | |
880fcd14 | 2321 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
2322 | const char *match, const char *actions) |
2323 | { | |
2324 | struct ovn_lflow target; | |
880fcd14 | 2325 | ovn_lflow_init(&target, od, stage, priority, |
d8026bbf | 2326 | CONST_CAST(char *, match), CONST_CAST(char *, actions), |
17bfa2aa | 2327 | NULL, NULL); |
5868eb24 BP |
2328 | |
2329 | struct ovn_lflow *lflow; | |
2330 | HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target), | |
2331 | lflows) { | |
2332 | if (ovn_lflow_equal(lflow, &target)) { | |
2333 | return lflow; | |
bd39395f BP |
2334 | } |
2335 | } | |
5868eb24 BP |
2336 | return NULL; |
2337 | } | |
bd39395f | 2338 | |
5868eb24 BP |
2339 | static void |
2340 | ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow) | |
2341 | { | |
2342 | if (lflow) { | |
2343 | hmap_remove(lflows, &lflow->hmap_node); | |
2344 | free(lflow->match); | |
2345 | free(lflow->actions); | |
17bfa2aa | 2346 | free(lflow->stage_hint); |
5868eb24 BP |
2347 | free(lflow); |
2348 | } | |
bd39395f BP |
2349 | } |
2350 | ||
bd39395f | 2351 | /* Appends port security constraints on L2 address field 'eth_addr_field' |
e93b43d6 JP |
2352 | * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs' |
2353 | * elements, is the collection of port_security constraints from an | |
2354 | * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */ | |
bd39395f | 2355 | static void |
685f4dfe | 2356 | build_port_security_l2(const char *eth_addr_field, |
e93b43d6 JP |
2357 | struct lport_addresses *ps_addrs, |
2358 | unsigned int n_ps_addrs, | |
685f4dfe | 2359 | struct ds *match) |
bd39395f | 2360 | { |
e93b43d6 JP |
2361 | if (!n_ps_addrs) { |
2362 | return; | |
2363 | } | |
bd39395f | 2364 | |
e93b43d6 | 2365 | ds_put_format(match, " && %s == {", eth_addr_field); |
f7cb14cd | 2366 | |
e93b43d6 JP |
2367 | for (size_t i = 0; i < n_ps_addrs; i++) { |
2368 | ds_put_format(match, "%s ", ps_addrs[i].ea_s); | |
bd39395f | 2369 | } |
f7cb14cd | 2370 | ds_chomp(match, ' '); |
bd39395f | 2371 | ds_put_cstr(match, "}"); |
bd39395f BP |
2372 | } |
2373 | ||
685f4dfe NS |
2374 | static void |
2375 | build_port_security_ipv6_nd_flow( | |
2376 | struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs, | |
2377 | int n_ipv6_addrs) | |
2378 | { | |
2379 | ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || " | |
2380 | "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || " | |
2381 | "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero), | |
2382 | ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero), | |
2383 | ETH_ADDR_ARGS(ea)); | |
2384 | if (!n_ipv6_addrs) { | |
2385 | ds_put_cstr(match, "))"); | |
2386 | return; | |
2387 | } | |
2388 | ||
2389 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
2390 | struct in6_addr lla; | |
2391 | in6_generate_lla(ea, &lla); | |
2392 | memset(ip6_str, 0, sizeof(ip6_str)); | |
2393 | ipv6_string_mapped(ip6_str, &lla); | |
2394 | ds_put_format(match, " && (nd.target == %s", ip6_str); | |
2395 | ||
2396 | for(int i = 0; i < n_ipv6_addrs; i++) { | |
2397 | memset(ip6_str, 0, sizeof(ip6_str)); | |
2398 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
2399 | ds_put_format(match, " || nd.target == %s", ip6_str); | |
2400 | } | |
2401 | ||
2402 | ds_put_format(match, ")))"); | |
2403 | } | |
2404 | ||
2405 | static void | |
2406 | build_port_security_ipv6_flow( | |
2407 | enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea, | |
2408 | struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs) | |
2409 | { | |
2410 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
2411 | ||
2412 | ds_put_format(match, " && %s == {", | |
2413 | pipeline == P_IN ? "ip6.src" : "ip6.dst"); | |
2414 | ||
2415 | /* Allow link-local address. */ | |
2416 | struct in6_addr lla; | |
2417 | in6_generate_lla(ea, &lla); | |
2418 | ipv6_string_mapped(ip6_str, &lla); | |
2419 | ds_put_format(match, "%s, ", ip6_str); | |
2420 | ||
9e687b23 DL |
2421 | /* Allow ip6.dst=ff00::/8 for multicast packets */ |
2422 | if (pipeline == P_OUT) { | |
2423 | ds_put_cstr(match, "ff00::/8, "); | |
2424 | } | |
685f4dfe NS |
2425 | for(int i = 0; i < n_ipv6_addrs; i++) { |
2426 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
9e687b23 | 2427 | ds_put_format(match, "%s, ", ip6_str); |
685f4dfe | 2428 | } |
9e687b23 DL |
2429 | /* Replace ", " by "}". */ |
2430 | ds_chomp(match, ' '); | |
2431 | ds_chomp(match, ','); | |
685f4dfe NS |
2432 | ds_put_cstr(match, "}"); |
2433 | } | |
2434 | ||
2435 | /** | |
2436 | * Build port security constraints on ARP and IPv6 ND fields | |
2437 | * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage. | |
2438 | * | |
2439 | * For each port security of the logical port, following | |
2440 | * logical flows are added | |
2441 | * - If the port security has no IP (both IPv4 and IPv6) or | |
2442 | * if it has IPv4 address(es) | |
2443 | * - Priority 90 flow to allow ARP packets for known MAC addresses | |
2444 | * in the eth.src and arp.spa fields. If the port security | |
2445 | * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field. | |
2446 | * | |
2447 | * - If the port security has no IP (both IPv4 and IPv6) or | |
2448 | * if it has IPv6 address(es) | |
2449 | * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses | |
2450 | * in the eth.src and nd.sll/nd.tll fields. If the port security | |
2451 | * has IPv6 addresses, allow known IPv6 addresses in the nd.target field | |
2452 | * for IPv6 Neighbor Advertisement packet. | |
2453 | * | |
2454 | * - Priority 80 flow to drop ARP and IPv6 ND packets. | |
2455 | */ | |
2456 | static void | |
2457 | build_port_security_nd(struct ovn_port *op, struct hmap *lflows) | |
2458 | { | |
e93b43d6 JP |
2459 | struct ds match = DS_EMPTY_INITIALIZER; |
2460 | ||
2461 | for (size_t i = 0; i < op->n_ps_addrs; i++) { | |
2462 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 2463 | |
e93b43d6 | 2464 | bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs); |
685f4dfe | 2465 | |
e93b43d6 JP |
2466 | ds_clear(&match); |
2467 | if (ps->n_ipv4_addrs || no_ip) { | |
2468 | ds_put_format(&match, | |
2469 | "inport == %s && eth.src == %s && arp.sha == %s", | |
2470 | op->json_key, ps->ea_s, ps->ea_s); | |
685f4dfe | 2471 | |
e93b43d6 JP |
2472 | if (ps->n_ipv4_addrs) { |
2473 | ds_put_cstr(&match, " && arp.spa == {"); | |
f95523c0 | 2474 | for (size_t j = 0; j < ps->n_ipv4_addrs; j++) { |
7d9d86ad NS |
2475 | /* When the netmask is applied, if the host portion is |
2476 | * non-zero, the host can only use the specified | |
2477 | * address in the arp.spa. If zero, the host is allowed | |
2478 | * to use any address in the subnet. */ | |
f95523c0 JP |
2479 | if (ps->ipv4_addrs[j].plen == 32 |
2480 | || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) { | |
2481 | ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s); | |
7d9d86ad | 2482 | } else { |
e93b43d6 | 2483 | ds_put_format(&match, "%s/%d", |
f95523c0 JP |
2484 | ps->ipv4_addrs[j].network_s, |
2485 | ps->ipv4_addrs[j].plen); | |
7d9d86ad | 2486 | } |
e93b43d6 | 2487 | ds_put_cstr(&match, ", "); |
685f4dfe NS |
2488 | } |
2489 | ds_chomp(&match, ' '); | |
e93b43d6 JP |
2490 | ds_chomp(&match, ','); |
2491 | ds_put_cstr(&match, "}"); | |
685f4dfe NS |
2492 | } |
2493 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, | |
2494 | ds_cstr(&match), "next;"); | |
685f4dfe NS |
2495 | } |
2496 | ||
e93b43d6 JP |
2497 | if (ps->n_ipv6_addrs || no_ip) { |
2498 | ds_clear(&match); | |
2499 | ds_put_format(&match, "inport == %s && eth.src == %s", | |
2500 | op->json_key, ps->ea_s); | |
2501 | build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs, | |
2502 | ps->n_ipv6_addrs); | |
685f4dfe NS |
2503 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, |
2504 | ds_cstr(&match), "next;"); | |
685f4dfe | 2505 | } |
685f4dfe NS |
2506 | } |
2507 | ||
e93b43d6 JP |
2508 | ds_clear(&match); |
2509 | ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key); | |
685f4dfe | 2510 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80, |
e93b43d6 JP |
2511 | ds_cstr(&match), "drop;"); |
2512 | ds_destroy(&match); | |
685f4dfe NS |
2513 | } |
2514 | ||
2515 | /** | |
2516 | * Build port security constraints on IPv4 and IPv6 src and dst fields | |
2517 | * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage. | |
2518 | * | |
2519 | * For each port security of the logical port, following | |
2520 | * logical flows are added | |
2521 | * - If the port security has IPv4 addresses, | |
2522 | * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses | |
2523 | * | |
2524 | * - If the port security has IPv6 addresses, | |
2525 | * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses | |
2526 | * | |
2527 | * - If the port security has IPv4 addresses or IPv6 addresses or both | |
2528 | * - Priority 80 flow to drop all IPv4 and IPv6 traffic | |
2529 | */ | |
2530 | static void | |
2531 | build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, | |
2532 | struct hmap *lflows) | |
2533 | { | |
2534 | char *port_direction; | |
2535 | enum ovn_stage stage; | |
2536 | if (pipeline == P_IN) { | |
2537 | port_direction = "inport"; | |
2538 | stage = S_SWITCH_IN_PORT_SEC_IP; | |
2539 | } else { | |
2540 | port_direction = "outport"; | |
2541 | stage = S_SWITCH_OUT_PORT_SEC_IP; | |
2542 | } | |
2543 | ||
e93b43d6 JP |
2544 | for (size_t i = 0; i < op->n_ps_addrs; i++) { |
2545 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 2546 | |
e93b43d6 | 2547 | if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) { |
685f4dfe NS |
2548 | continue; |
2549 | } | |
2550 | ||
e93b43d6 | 2551 | if (ps->n_ipv4_addrs) { |
685f4dfe NS |
2552 | struct ds match = DS_EMPTY_INITIALIZER; |
2553 | if (pipeline == P_IN) { | |
9e687b23 DL |
2554 | /* Permit use of the unspecified address for DHCP discovery */ |
2555 | struct ds dhcp_match = DS_EMPTY_INITIALIZER; | |
2556 | ds_put_format(&dhcp_match, "inport == %s" | |
e93b43d6 | 2557 | " && eth.src == %s" |
9e687b23 DL |
2558 | " && ip4.src == 0.0.0.0" |
2559 | " && ip4.dst == 255.255.255.255" | |
e93b43d6 JP |
2560 | " && udp.src == 68 && udp.dst == 67", |
2561 | op->json_key, ps->ea_s); | |
9e687b23 DL |
2562 | ovn_lflow_add(lflows, op->od, stage, 90, |
2563 | ds_cstr(&dhcp_match), "next;"); | |
2564 | ds_destroy(&dhcp_match); | |
e93b43d6 | 2565 | ds_put_format(&match, "inport == %s && eth.src == %s" |
9e687b23 | 2566 | " && ip4.src == {", op->json_key, |
e93b43d6 | 2567 | ps->ea_s); |
685f4dfe | 2568 | } else { |
e93b43d6 | 2569 | ds_put_format(&match, "outport == %s && eth.dst == %s" |
685f4dfe | 2570 | " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ", |
e93b43d6 | 2571 | op->json_key, ps->ea_s); |
685f4dfe NS |
2572 | } |
2573 | ||
f95523c0 JP |
2574 | for (int j = 0; j < ps->n_ipv4_addrs; j++) { |
2575 | ovs_be32 mask = ps->ipv4_addrs[j].mask; | |
7d9d86ad NS |
2576 | /* When the netmask is applied, if the host portion is |
2577 | * non-zero, the host can only use the specified | |
2578 | * address. If zero, the host is allowed to use any | |
2579 | * address in the subnet. | |
e93b43d6 | 2580 | */ |
f95523c0 JP |
2581 | if (ps->ipv4_addrs[j].plen == 32 |
2582 | || ps->ipv4_addrs[j].addr & ~mask) { | |
2583 | ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s); | |
2584 | if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) { | |
e93b43d6 JP |
2585 | /* Host is also allowed to receive packets to the |
2586 | * broadcast address in the specified subnet. */ | |
2587 | ds_put_format(&match, ", %s", | |
f95523c0 | 2588 | ps->ipv4_addrs[j].bcast_s); |
7d9d86ad NS |
2589 | } |
2590 | } else { | |
2591 | /* host portion is zero */ | |
f95523c0 JP |
2592 | ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s, |
2593 | ps->ipv4_addrs[j].plen); | |
7d9d86ad NS |
2594 | } |
2595 | ds_put_cstr(&match, ", "); | |
685f4dfe NS |
2596 | } |
2597 | ||
2598 | /* Replace ", " by "}". */ | |
2599 | ds_chomp(&match, ' '); | |
2600 | ds_chomp(&match, ','); | |
2601 | ds_put_cstr(&match, "}"); | |
2602 | ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;"); | |
2603 | ds_destroy(&match); | |
685f4dfe NS |
2604 | } |
2605 | ||
e93b43d6 | 2606 | if (ps->n_ipv6_addrs) { |
685f4dfe | 2607 | struct ds match = DS_EMPTY_INITIALIZER; |
9e687b23 DL |
2608 | if (pipeline == P_IN) { |
2609 | /* Permit use of unspecified address for duplicate address | |
2610 | * detection */ | |
2611 | struct ds dad_match = DS_EMPTY_INITIALIZER; | |
2612 | ds_put_format(&dad_match, "inport == %s" | |
e93b43d6 | 2613 | " && eth.src == %s" |
9e687b23 DL |
2614 | " && ip6.src == ::" |
2615 | " && ip6.dst == ff02::/16" | |
2616 | " && icmp6.type == {131, 135, 143}", op->json_key, | |
e93b43d6 | 2617 | ps->ea_s); |
9e687b23 DL |
2618 | ovn_lflow_add(lflows, op->od, stage, 90, |
2619 | ds_cstr(&dad_match), "next;"); | |
2620 | ds_destroy(&dad_match); | |
2621 | } | |
e93b43d6 | 2622 | ds_put_format(&match, "%s == %s && %s == %s", |
685f4dfe | 2623 | port_direction, op->json_key, |
e93b43d6 JP |
2624 | pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s); |
2625 | build_port_security_ipv6_flow(pipeline, &match, ps->ea, | |
2626 | ps->ipv6_addrs, ps->n_ipv6_addrs); | |
685f4dfe NS |
2627 | ovn_lflow_add(lflows, op->od, stage, 90, |
2628 | ds_cstr(&match), "next;"); | |
2629 | ds_destroy(&match); | |
685f4dfe NS |
2630 | } |
2631 | ||
e93b43d6 JP |
2632 | char *match = xasprintf("%s == %s && %s == %s && ip", |
2633 | port_direction, op->json_key, | |
2634 | pipeline == P_IN ? "eth.src" : "eth.dst", | |
2635 | ps->ea_s); | |
685f4dfe NS |
2636 | ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;"); |
2637 | free(match); | |
2638 | } | |
f2a715b5 | 2639 | |
685f4dfe NS |
2640 | } |
2641 | ||
95a9a275 | 2642 | static bool |
80f408f4 | 2643 | lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) |
95a9a275 | 2644 | { |
80f408f4 | 2645 | return !lsp->enabled || *lsp->enabled; |
95a9a275 RB |
2646 | } |
2647 | ||
4c7bf534 | 2648 | static bool |
80f408f4 | 2649 | lsp_is_up(const struct nbrec_logical_switch_port *lsp) |
4c7bf534 | 2650 | { |
80f408f4 | 2651 | return !lsp->up || *lsp->up; |
4c7bf534 NS |
2652 | } |
2653 | ||
281977f7 NS |
2654 | static bool |
2655 | build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip, | |
213615b3 NS |
2656 | struct ds *options_action, struct ds *response_action, |
2657 | struct ds *ipv4_addr_match) | |
281977f7 NS |
2658 | { |
2659 | if (!op->nbsp->dhcpv4_options) { | |
2660 | /* CMS has disabled native DHCPv4 for this lport. */ | |
2661 | return false; | |
2662 | } | |
2663 | ||
2664 | ovs_be32 host_ip, mask; | |
2665 | char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip, | |
2666 | &mask); | |
2667 | if (error || ((offer_ip ^ host_ip) & mask)) { | |
2668 | /* Either | |
2669 | * - cidr defined is invalid or | |
2670 | * - the offer ip of the logical port doesn't belong to the cidr | |
2671 | * defined in the DHCPv4 options. | |
2672 | * */ | |
2673 | free(error); | |
2674 | return false; | |
2675 | } | |
2676 | ||
2677 | const char *server_ip = smap_get( | |
2678 | &op->nbsp->dhcpv4_options->options, "server_id"); | |
2679 | const char *server_mac = smap_get( | |
2680 | &op->nbsp->dhcpv4_options->options, "server_mac"); | |
2681 | const char *lease_time = smap_get( | |
2682 | &op->nbsp->dhcpv4_options->options, "lease_time"); | |
281977f7 | 2683 | |
b89d25e5 GL |
2684 | if (!(server_ip && server_mac && lease_time)) { |
2685 | /* "server_id", "server_mac" and "lease_time" should be | |
281977f7 NS |
2686 | * present in the dhcp_options. */ |
2687 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
2688 | VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s", | |
2689 | op->json_key); | |
2690 | return false; | |
2691 | } | |
2692 | ||
2693 | struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options); | |
2694 | smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options); | |
2695 | ||
2696 | /* server_mac is not DHCPv4 option, delete it from the smap. */ | |
2697 | smap_remove(&dhcpv4_options, "server_mac"); | |
2698 | char *netmask = xasprintf(IP_FMT, IP_ARGS(mask)); | |
2699 | smap_add(&dhcpv4_options, "netmask", netmask); | |
2700 | free(netmask); | |
2701 | ||
2702 | ds_put_format(options_action, | |
2703 | REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = " | |
2704 | IP_FMT", ", IP_ARGS(offer_ip)); | |
7c76bf4e DDP |
2705 | |
2706 | /* We're not using SMAP_FOR_EACH because we want a consistent order of the | |
2707 | * options on different architectures (big or little endian, SSE4.2) */ | |
2708 | const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options); | |
2709 | for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) { | |
2710 | const struct smap_node *node = sorted_opts[i]; | |
281977f7 NS |
2711 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); |
2712 | } | |
7c76bf4e | 2713 | free(sorted_opts); |
281977f7 NS |
2714 | |
2715 | ds_chomp(options_action, ' '); | |
2716 | ds_chomp(options_action, ','); | |
2717 | ds_put_cstr(options_action, "); next;"); | |
2718 | ||
2719 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
2720 | "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; " | |
bf143492 JP |
2721 | "udp.dst = 68; outport = inport; flags.loopback = 1; " |
2722 | "output;", | |
281977f7 NS |
2723 | server_mac, IP_ARGS(offer_ip), server_ip); |
2724 | ||
213615b3 NS |
2725 | ds_put_format(ipv4_addr_match, |
2726 | "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}", | |
2727 | IP_ARGS(offer_ip), server_ip); | |
281977f7 NS |
2728 | smap_destroy(&dhcpv4_options); |
2729 | return true; | |
2730 | } | |
2731 | ||
33ac3c83 NS |
2732 | static bool |
2733 | build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip, | |
2734 | struct ds *options_action, struct ds *response_action) | |
2735 | { | |
2736 | if (!op->nbsp->dhcpv6_options) { | |
2737 | /* CMS has disabled native DHCPv6 for this lport. */ | |
2738 | return false; | |
2739 | } | |
2740 | ||
2741 | struct in6_addr host_ip, mask; | |
2742 | ||
2743 | char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip, | |
2744 | &mask); | |
2745 | if (error) { | |
2746 | free(error); | |
2747 | return false; | |
2748 | } | |
2749 | struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip); | |
2750 | ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask); | |
2751 | if (!ipv6_mask_is_any(&ip6_mask)) { | |
2752 | /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6 | |
2753 | * options.*/ | |
2754 | return false; | |
2755 | } | |
2756 | ||
7c76bf4e | 2757 | const struct smap *options_map = &op->nbsp->dhcpv6_options->options; |
33ac3c83 | 2758 | /* "server_id" should be the MAC address. */ |
7c76bf4e | 2759 | const char *server_mac = smap_get(options_map, "server_id"); |
33ac3c83 NS |
2760 | struct eth_addr ea; |
2761 | if (!server_mac || !eth_addr_from_string(server_mac, &ea)) { | |
2762 | /* "server_id" should be present in the dhcpv6_options. */ | |
2763 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2764 | VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options" | |
2765 | " for lport %s", op->json_key); | |
2766 | return false; | |
2767 | } | |
2768 | ||
2769 | /* Get the link local IP of the DHCPv6 server from the server MAC. */ | |
2770 | struct in6_addr lla; | |
2771 | in6_generate_lla(ea, &lla); | |
2772 | ||
2773 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
2774 | ipv6_string_mapped(server_ip, &lla); | |
2775 | ||
2776 | char ia_addr[INET6_ADDRSTRLEN + 1]; | |
2777 | ipv6_string_mapped(ia_addr, offer_ip); | |
2778 | ||
2779 | ds_put_format(options_action, | |
40df4566 ZKL |
2780 | REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts("); |
2781 | ||
2782 | /* Check whether the dhcpv6 options should be configured as stateful. | |
2783 | * Only reply with ia_addr option for dhcpv6 stateful address mode. */ | |
7c76bf4e | 2784 | if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) { |
40df4566 | 2785 | ipv6_string_mapped(ia_addr, offer_ip); |
40df4566 ZKL |
2786 | ds_put_format(options_action, "ia_addr = %s, ", ia_addr); |
2787 | } | |
2788 | ||
7c76bf4e DDP |
2789 | /* We're not using SMAP_FOR_EACH because we want a consistent order of the |
2790 | * options on different architectures (big or little endian, SSE4.2) */ | |
2791 | const struct smap_node **sorted_opts = smap_sort(options_map); | |
2792 | for (size_t i = 0; i < smap_count(options_map); i++) { | |
2793 | const struct smap_node *node = sorted_opts[i]; | |
40df4566 ZKL |
2794 | if (strcmp(node->key, "dhcpv6_stateless")) { |
2795 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); | |
2796 | } | |
33ac3c83 | 2797 | } |
7c76bf4e DDP |
2798 | free(sorted_opts); |
2799 | ||
33ac3c83 NS |
2800 | ds_chomp(options_action, ' '); |
2801 | ds_chomp(options_action, ','); | |
2802 | ds_put_cstr(options_action, "); next;"); | |
2803 | ||
2804 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
2805 | "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; " | |
2806 | "udp.dst = 546; outport = inport; flags.loopback = 1; " | |
2807 | "output;", | |
2808 | server_mac, server_ip); | |
40df4566 | 2809 | |
33ac3c83 NS |
2810 | return true; |
2811 | } | |
2812 | ||
78aab811 JP |
2813 | static bool |
2814 | has_stateful_acl(struct ovn_datapath *od) | |
2815 | { | |
9975d7be BP |
2816 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
2817 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 JP |
2818 | if (!strcmp(acl->action, "allow-related")) { |
2819 | return true; | |
2820 | } | |
2821 | } | |
2822 | ||
2823 | return false; | |
2824 | } | |
2825 | ||
2826 | static void | |
9ab989b7 | 2827 | build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) |
78aab811 JP |
2828 | { |
2829 | bool has_stateful = has_stateful_acl(od); | |
2830 | ||
2831 | /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are | |
2832 | * allowed by default. */ | |
880fcd14 BP |
2833 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); |
2834 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;"); | |
78aab811 | 2835 | |
c132fca0 | 2836 | /* If there are any stateful ACL rules in this datapath, we must |
78aab811 JP |
2837 | * send all IP packets through the conntrack action, which handles |
2838 | * defragmentation, in order to match L4 headers. */ | |
2839 | if (has_stateful) { | |
9ab989b7 BP |
2840 | for (size_t i = 0; i < od->n_router_ports; i++) { |
2841 | struct ovn_port *op = od->router_ports[i]; | |
2842 | /* Can't use ct() for router ports. Consider the | |
2843 | * following configuration: lp1(10.0.0.2) on | |
2844 | * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a | |
2845 | * ping from lp1 to lp2, First, the response will go | |
2846 | * through ct() with a zone for lp2 in the ls2 ingress | |
2847 | * pipeline on hostB. That ct zone knows about this | |
2848 | * connection. Next, it goes through ct() with the zone | |
2849 | * for the router port in the egress pipeline of ls2 on | |
2850 | * hostB. This zone does not know about the connection, | |
2851 | * as the icmp request went through the logical router | |
2852 | * on hostA, not hostB. This would only work with | |
2853 | * distributed conntrack state across all chassis. */ | |
2854 | struct ds match_in = DS_EMPTY_INITIALIZER; | |
2855 | struct ds match_out = DS_EMPTY_INITIALIZER; | |
2856 | ||
2857 | ds_put_format(&match_in, "ip && inport == %s", op->json_key); | |
2858 | ds_put_format(&match_out, "ip && outport == %s", op->json_key); | |
2859 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, | |
2860 | ds_cstr(&match_in), "next;"); | |
2861 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, | |
2862 | ds_cstr(&match_out), "next;"); | |
2863 | ||
2864 | ds_destroy(&match_in); | |
2865 | ds_destroy(&match_out); | |
48fcdb47 | 2866 | } |
5b29422c | 2867 | if (od->localnet_port) { |
2868 | struct ds match_in = DS_EMPTY_INITIALIZER; | |
2869 | struct ds match_out = DS_EMPTY_INITIALIZER; | |
2870 | ||
2871 | ds_put_format(&match_in, "ip && inport == %s", | |
2872 | od->localnet_port->json_key); | |
2873 | ds_put_format(&match_out, "ip && outport == %s", | |
2874 | od->localnet_port->json_key); | |
2875 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, | |
2876 | ds_cstr(&match_in), "next;"); | |
2877 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, | |
2878 | ds_cstr(&match_out), "next;"); | |
2879 | ||
2880 | ds_destroy(&match_in); | |
2881 | ds_destroy(&match_out); | |
2882 | } | |
2883 | ||
2d018f9b GS |
2884 | /* Ingress and Egress Pre-ACL Table (Priority 110). |
2885 | * | |
2886 | * Not to do conntrack on ND packets. */ | |
2887 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;"); | |
4364646c ZKL |
2888 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "(nd_rs || nd_ra)", |
2889 | "next;"); | |
2d018f9b | 2890 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;"); |
4364646c ZKL |
2891 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, |
2892 | "(nd_rs || nd_ra)", "next;"); | |
48fcdb47 | 2893 | |
78aab811 JP |
2894 | /* Ingress and Egress Pre-ACL Table (Priority 100). |
2895 | * | |
2896 | * Regardless of whether the ACL is "from-lport" or "to-lport", | |
2897 | * we need rules in both the ingress and egress table, because | |
facf8652 GS |
2898 | * the return traffic needs to be followed. |
2899 | * | |
2900 | * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send | |
2901 | * it to conntrack for tracking and defragmentation. */ | |
2902 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", | |
2903 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2904 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", | |
2905 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2d018f9b GS |
2906 | } |
2907 | } | |
78aab811 | 2908 | |
7a15be69 GS |
2909 | /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and |
2910 | * 'ip_address'. The caller must free() the memory allocated for | |
2911 | * 'ip_address'. */ | |
2912 | static void | |
2913 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
485d373b | 2914 | uint16_t *port, int *addr_family) |
7a15be69 | 2915 | { |
485d373b MM |
2916 | struct sockaddr_storage ss; |
2917 | char ip_addr_buf[INET6_ADDRSTRLEN]; | |
2918 | char *error; | |
7a15be69 | 2919 | |
485d373b MM |
2920 | error = ipv46_parse(key, PORT_OPTIONAL, &ss); |
2921 | if (error) { | |
7a15be69 | 2922 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); |
485d373b MM |
2923 | VLOG_WARN_RL(&rl, "bad ip address or port for load balancer key %s", |
2924 | key); | |
7a15be69 GS |
2925 | free(error); |
2926 | return; | |
2927 | } | |
2928 | ||
485d373b MM |
2929 | if (ss.ss_family == AF_INET) { |
2930 | struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *, &ss); | |
2931 | *port = sin->sin_port == 0 ? 0 : ntohs(sin->sin_port); | |
2932 | inet_ntop(AF_INET, &sin->sin_addr, ip_addr_buf, sizeof ip_addr_buf); | |
2933 | } else { | |
2934 | struct sockaddr_in6 *sin6 = ALIGNED_CAST(struct sockaddr_in6 *, &ss); | |
2935 | *port = sin6->sin6_port == 0 ? 0 : ntohs(sin6->sin6_port); | |
2936 | inet_ntop(AF_INET6, &sin6->sin6_addr, ip_addr_buf, sizeof ip_addr_buf); | |
7a15be69 GS |
2937 | } |
2938 | ||
485d373b MM |
2939 | *ip_address = xstrdup(ip_addr_buf); |
2940 | *addr_family = ss.ss_family; | |
7a15be69 GS |
2941 | } |
2942 | ||
302eda27 NS |
2943 | /* |
2944 | * Returns true if logical switch is configured with DNS records, false | |
2945 | * otherwise. | |
2946 | */ | |
2947 | static bool | |
2948 | ls_has_dns_records(const struct nbrec_logical_switch *nbs) | |
2949 | { | |
2950 | for (size_t i = 0; i < nbs->n_dns_records; i++) { | |
2951 | if (!smap_is_empty(&nbs->dns_records[i]->records)) { | |
2952 | return true; | |
2953 | } | |
2954 | } | |
2955 | ||
2956 | return false; | |
2957 | } | |
2958 | ||
7a15be69 GS |
2959 | static void |
2960 | build_pre_lb(struct ovn_datapath *od, struct hmap *lflows) | |
2961 | { | |
2962 | /* Allow all packets to go to next tables by default. */ | |
2963 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;"); | |
2964 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;"); | |
2965 | ||
2966 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
61591ad9 | 2967 | bool vip_configured = false; |
485d373b | 2968 | int addr_family = AF_INET; |
61591ad9 GS |
2969 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { |
2970 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
2971 | struct smap *vips = &lb->vips; |
2972 | struct smap_node *node; | |
7a15be69 GS |
2973 | |
2974 | SMAP_FOR_EACH (node, vips) { | |
2975 | vip_configured = true; | |
2976 | ||
2977 | /* node->key contains IP:port or just IP. */ | |
2978 | char *ip_address = NULL; | |
2979 | uint16_t port; | |
485d373b MM |
2980 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port, |
2981 | &addr_family); | |
7a15be69 GS |
2982 | if (!ip_address) { |
2983 | continue; | |
2984 | } | |
2985 | ||
2986 | if (!sset_contains(&all_ips, ip_address)) { | |
2987 | sset_add(&all_ips, ip_address); | |
2988 | } | |
2989 | ||
2990 | free(ip_address); | |
2991 | ||
2992 | /* Ignore L4 port information in the key because fragmented packets | |
2993 | * may not have L4 information. The pre-stateful table will send | |
2994 | * the packet through ct() action to de-fragment. In stateful | |
2995 | * table, we will eventually look at L4 information. */ | |
2996 | } | |
61591ad9 | 2997 | } |
7a15be69 | 2998 | |
61591ad9 GS |
2999 | /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send |
3000 | * packet to conntrack for defragmentation. */ | |
3001 | const char *ip_address; | |
3002 | SSET_FOR_EACH(ip_address, &all_ips) { | |
485d373b MM |
3003 | char *match; |
3004 | ||
3005 | if (addr_family == AF_INET) { | |
3006 | match = xasprintf("ip && ip4.dst == %s", ip_address); | |
3007 | } else { | |
3008 | match = xasprintf("ip && ip6.dst == %s", ip_address); | |
3009 | } | |
61591ad9 GS |
3010 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, |
3011 | 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
3012 | free(match); | |
3013 | } | |
7a15be69 | 3014 | |
61591ad9 | 3015 | sset_destroy(&all_ips); |
7a15be69 | 3016 | |
61591ad9 GS |
3017 | if (vip_configured) { |
3018 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, | |
3019 | 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
7a15be69 GS |
3020 | } |
3021 | } | |
3022 | ||
facf8652 GS |
3023 | static void |
3024 | build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
3025 | { | |
3026 | /* Ingress and Egress pre-stateful Table (Priority 0): Packets are | |
3027 | * allowed by default. */ | |
3028 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;"); | |
3029 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;"); | |
3030 | ||
3031 | /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be | |
3032 | * sent to conntrack for tracking and defragmentation. */ | |
3033 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100, | |
3034 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
3035 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100, | |
3036 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
3037 | } | |
3038 | ||
d383eed5 JP |
3039 | static void |
3040 | build_acl_log(struct ds *actions, const struct nbrec_acl *acl) | |
3041 | { | |
3042 | if (!acl->log) { | |
3043 | return; | |
3044 | } | |
3045 | ||
3046 | ds_put_cstr(actions, "log("); | |
3047 | ||
3048 | if (acl->name) { | |
3049 | ds_put_format(actions, "name=\"%s\", ", acl->name); | |
3050 | } | |
3051 | ||
3052 | /* If a severity level isn't specified, default to "info". */ | |
3053 | if (acl->severity) { | |
3054 | ds_put_format(actions, "severity=%s, ", acl->severity); | |
3055 | } else { | |
3056 | ds_put_format(actions, "severity=info, "); | |
3057 | } | |
3058 | ||
3059 | if (!strcmp(acl->action, "drop")) { | |
3060 | ds_put_cstr(actions, "verdict=drop, "); | |
3061 | } else if (!strcmp(acl->action, "reject")) { | |
3062 | ds_put_cstr(actions, "verdict=reject, "); | |
3063 | } else if (!strcmp(acl->action, "allow") | |
3064 | || !strcmp(acl->action, "allow-related")) { | |
3065 | ds_put_cstr(actions, "verdict=allow, "); | |
3066 | } | |
3067 | ||
3068 | ds_chomp(actions, ' '); | |
3069 | ds_chomp(actions, ','); | |
3070 | ds_put_cstr(actions, "); "); | |
3071 | } | |
3072 | ||
2d018f9b GS |
3073 | static void |
3074 | build_acls(struct ovn_datapath *od, struct hmap *lflows) | |
3075 | { | |
3076 | bool has_stateful = has_stateful_acl(od); | |
e75451fe | 3077 | |
2d018f9b GS |
3078 | /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by |
3079 | * default. A related rule at priority 1 is added below if there | |
3080 | * are any stateful ACLs in this datapath. */ | |
3081 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); | |
3082 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); | |
3083 | ||
3084 | if (has_stateful) { | |
78aab811 JP |
3085 | /* Ingress and Egress ACL Table (Priority 1). |
3086 | * | |
3087 | * By default, traffic is allowed. This is partially handled by | |
3088 | * the Priority 0 ACL flows added earlier, but we also need to | |
3089 | * commit IP flows. This is because, while the initiater's | |
3090 | * direction may not have any stateful rules, the server's may | |
3091 | * and then its return traffic would not have an associated | |
cc58e1f2 RB |
3092 | * conntrack entry and would return "+invalid". |
3093 | * | |
3094 | * We use "ct_commit" for a connection that is not already known | |
3095 | * by the connection tracker. Once a connection is committed, | |
3096 | * subsequent packets will hit the flow at priority 0 that just | |
3097 | * uses "next;" | |
3098 | * | |
b73db61d | 3099 | * We also check for established connections that have ct_label.blocked |
cc58e1f2 RB |
3100 | * set on them. That's a connection that was disallowed, but is |
3101 | * now allowed by policy again since it hit this default-allow flow. | |
b73db61d | 3102 | * We need to set ct_label.blocked=0 to let the connection continue, |
cc58e1f2 RB |
3103 | * which will be done by ct_commit() in the "stateful" stage. |
3104 | * Subsequent packets will hit the flow at priority 0 that just | |
3105 | * uses "next;". */ | |
3106 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, | |
b73db61d | 3107 | "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", |
cc58e1f2 RB |
3108 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); |
3109 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, | |
b73db61d | 3110 | "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", |
cc58e1f2 | 3111 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); |
78aab811 JP |
3112 | |
3113 | /* Ingress and Egress ACL Table (Priority 65535). | |
3114 | * | |
cc58e1f2 RB |
3115 | * Always drop traffic that's in an invalid state. Also drop |
3116 | * reply direction packets for connections that have been marked | |
3117 | * for deletion (bit 0 of ct_label is set). | |
3118 | * | |
3119 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 3120 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
b73db61d | 3121 | "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", |
cc58e1f2 | 3122 | "drop;"); |
880fcd14 | 3123 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
b73db61d | 3124 | "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", |
cc58e1f2 | 3125 | "drop;"); |
78aab811 JP |
3126 | |
3127 | /* Ingress and Egress ACL Table (Priority 65535). | |
3128 | * | |
cc58e1f2 RB |
3129 | * Allow reply traffic that is part of an established |
3130 | * conntrack entry that has not been marked for deletion | |
3131 | * (bit 0 of ct_label). We only match traffic in the | |
3132 | * reply direction because we want traffic in the request | |
3133 | * direction to hit the currently defined policy from ACLs. | |
3134 | * | |
3135 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 3136 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 | 3137 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
b73db61d | 3138 | "&& ct.rpl && ct_label.blocked == 0", |
78aab811 | 3139 | "next;"); |
880fcd14 | 3140 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 | 3141 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
b73db61d | 3142 | "&& ct.rpl && ct_label.blocked == 0", |
78aab811 JP |
3143 | "next;"); |
3144 | ||
3145 | /* Ingress and Egress ACL Table (Priority 65535). | |
3146 | * | |
cc58e1f2 RB |
3147 | * Allow traffic that is related to an existing conntrack entry that |
3148 | * has not been marked for deletion (bit 0 of ct_label). | |
3149 | * | |
3150 | * This is enforced at a higher priority than ACLs can be defined. | |
78aab811 JP |
3151 | * |
3152 | * NOTE: This does not support related data sessions (eg, | |
3153 | * a dynamically negotiated FTP data channel), but will allow | |
3154 | * related traffic such as an ICMP Port Unreachable through | |
3155 | * that's generated from a non-listening UDP port. */ | |
880fcd14 | 3156 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 | 3157 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
b73db61d | 3158 | "&& ct_label.blocked == 0", |
78aab811 | 3159 | "next;"); |
880fcd14 | 3160 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 | 3161 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
b73db61d | 3162 | "&& ct_label.blocked == 0", |
78aab811 | 3163 | "next;"); |
e75451fe ZKL |
3164 | |
3165 | /* Ingress and Egress ACL Table (Priority 65535). | |
3166 | * | |
3167 | * Not to do conntrack on ND packets. */ | |
3168 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;"); | |
3169 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;"); | |
78aab811 JP |
3170 | } |
3171 | ||
3172 | /* Ingress or Egress ACL Table (Various priorities). */ | |
9975d7be BP |
3173 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
3174 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 | 3175 | bool ingress = !strcmp(acl->direction, "from-lport") ? true :false; |
880fcd14 | 3176 | enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL; |
78aab811 | 3177 | |
17bfa2aa | 3178 | char *stage_hint = xasprintf("%08x", acl->header_.uuid.parts[0]); |
cc58e1f2 RB |
3179 | if (!strcmp(acl->action, "allow") |
3180 | || !strcmp(acl->action, "allow-related")) { | |
78aab811 JP |
3181 | /* If there are any stateful flows, we must even commit "allow" |
3182 | * actions. This is because, while the initiater's | |
3183 | * direction may not have any stateful rules, the server's | |
3184 | * may and then its return traffic would not have an | |
3185 | * associated conntrack entry and would return "+invalid". */ | |
cc58e1f2 | 3186 | if (!has_stateful) { |
d383eed5 JP |
3187 | struct ds actions = DS_EMPTY_INITIALIZER; |
3188 | build_acl_log(&actions, acl); | |
3189 | ds_put_cstr(&actions, "next;"); | |
17bfa2aa HZ |
3190 | ovn_lflow_add_with_hint(lflows, od, stage, |
3191 | acl->priority + OVN_ACL_PRI_OFFSET, | |
d383eed5 JP |
3192 | acl->match, ds_cstr(&actions), |
3193 | stage_hint); | |
3194 | ds_destroy(&actions); | |
cc58e1f2 RB |
3195 | } else { |
3196 | struct ds match = DS_EMPTY_INITIALIZER; | |
d383eed5 | 3197 | struct ds actions = DS_EMPTY_INITIALIZER; |
cc58e1f2 RB |
3198 | |
3199 | /* Commit the connection tracking entry if it's a new | |
3200 | * connection that matches this ACL. After this commit, | |
3201 | * the reply traffic is allowed by a flow we create at | |
3202 | * priority 65535, defined earlier. | |
3203 | * | |
3204 | * It's also possible that a known connection was marked for | |
3205 | * deletion after a policy was deleted, but the policy was | |
3206 | * re-added while that connection is still known. We catch | |
b73db61d | 3207 | * that case here and un-set ct_label.blocked (which will be done |
cc58e1f2 RB |
3208 | * by ct_commit in the "stateful" stage) to indicate that the |
3209 | * connection should be allowed to resume. | |
3210 | */ | |
3211 | ds_put_format(&match, "((ct.new && !ct.est)" | |
3212 | " || (!ct.new && ct.est && !ct.rpl " | |
b73db61d | 3213 | "&& ct_label.blocked == 1)) " |
cc58e1f2 | 3214 | "&& (%s)", acl->match); |
d383eed5 JP |
3215 | ds_put_cstr(&actions, REGBIT_CONNTRACK_COMMIT" = 1; "); |
3216 | build_acl_log(&actions, acl); | |
3217 | ds_put_cstr(&actions, "next;"); | |
17bfa2aa HZ |
3218 | ovn_lflow_add_with_hint(lflows, od, stage, |
3219 | acl->priority + OVN_ACL_PRI_OFFSET, | |
3220 | ds_cstr(&match), | |
d383eed5 | 3221 | ds_cstr(&actions), |
17bfa2aa | 3222 | stage_hint); |
cc58e1f2 RB |
3223 | |
3224 | /* Match on traffic in the request direction for an established | |
3225 | * connection tracking entry that has not been marked for | |
3226 | * deletion. There is no need to commit here, so we can just | |
3227 | * proceed to the next table. We use this to ensure that this | |
3228 | * connection is still allowed by the currently defined | |
3229 | * policy. */ | |
3230 | ds_clear(&match); | |
d383eed5 | 3231 | ds_clear(&actions); |
cc58e1f2 RB |
3232 | ds_put_format(&match, |
3233 | "!ct.new && ct.est && !ct.rpl" | |
b73db61d | 3234 | " && ct_label.blocked == 0 && (%s)", |
cc58e1f2 | 3235 | acl->match); |
d383eed5 JP |
3236 | |
3237 | build_acl_log(&actions, acl); | |
3238 | ds_put_cstr(&actions, "next;"); | |
17bfa2aa HZ |
3239 | ovn_lflow_add_with_hint(lflows, od, stage, |
3240 | acl->priority + OVN_ACL_PRI_OFFSET, | |
d383eed5 | 3241 | ds_cstr(&match), ds_cstr(&actions), |
17bfa2aa | 3242 | stage_hint); |
cc58e1f2 RB |
3243 | |
3244 | ds_destroy(&match); | |
d383eed5 | 3245 | ds_destroy(&actions); |
cc58e1f2 RB |
3246 | } |
3247 | } else if (!strcmp(acl->action, "drop") | |
3248 | || !strcmp(acl->action, "reject")) { | |
78aab811 | 3249 | struct ds match = DS_EMPTY_INITIALIZER; |
d383eed5 | 3250 | struct ds actions = DS_EMPTY_INITIALIZER; |
78aab811 | 3251 | |
cc58e1f2 RB |
3252 | /* XXX Need to support "reject", treat it as "drop;" for now. */ |
3253 | if (!strcmp(acl->action, "reject")) { | |
3254 | VLOG_INFO("reject is not a supported action"); | |
3255 | } | |
78aab811 | 3256 | |
cc58e1f2 RB |
3257 | /* The implementation of "drop" differs if stateful ACLs are in |
3258 | * use for this datapath. In that case, the actions differ | |
3259 | * depending on whether the connection was previously committed | |
3260 | * to the connection tracker with ct_commit. */ | |
3261 | if (has_stateful) { | |
3262 | /* If the packet is not part of an established connection, then | |
3263 | * we can simply drop it. */ | |
3264 | ds_put_format(&match, | |
b73db61d | 3265 | "(!ct.est || (ct.est && ct_label.blocked == 1)) " |
cc58e1f2 RB |
3266 | "&& (%s)", |
3267 | acl->match); | |
d383eed5 JP |
3268 | ds_clear(&actions); |
3269 | build_acl_log(&actions, acl); | |
3270 | ds_put_cstr(&actions, "/* drop */"); | |
17bfa2aa HZ |
3271 | ovn_lflow_add_with_hint(lflows, od, stage, |
3272 | acl->priority + OVN_ACL_PRI_OFFSET, | |
d383eed5 | 3273 | ds_cstr(&match), ds_cstr(&actions), |
17bfa2aa | 3274 | stage_hint); |
cc58e1f2 RB |
3275 | |
3276 | /* For an existing connection without ct_label set, we've | |
3277 | * encountered a policy change. ACLs previously allowed | |
3278 | * this connection and we committed the connection tracking | |
3279 | * entry. Current policy says that we should drop this | |
3280 | * connection. First, we set bit 0 of ct_label to indicate | |
3281 | * that this connection is set for deletion. By not | |
3282 | * specifying "next;", we implicitly drop the packet after | |
3283 | * updating conntrack state. We would normally defer | |
3284 | * ct_commit() to the "stateful" stage, but since we're | |
3285 | * dropping the packet, we go ahead and do it here. */ | |
3286 | ds_clear(&match); | |
d383eed5 | 3287 | ds_clear(&actions); |
cc58e1f2 | 3288 | ds_put_format(&match, |
b73db61d | 3289 | "ct.est && ct_label.blocked == 0 && (%s)", |
cc58e1f2 | 3290 | acl->match); |
d383eed5 JP |
3291 | ds_put_cstr(&actions, "ct_commit(ct_label=1/1); "); |
3292 | build_acl_log(&actions, acl); | |
3293 | ds_put_cstr(&actions, "/* drop */"); | |
17bfa2aa HZ |
3294 | ovn_lflow_add_with_hint(lflows, od, stage, |
3295 | acl->priority + OVN_ACL_PRI_OFFSET, | |
d383eed5 | 3296 | ds_cstr(&match), ds_cstr(&actions), |
17bfa2aa | 3297 | stage_hint); |
cc58e1f2 | 3298 | |
cc58e1f2 RB |
3299 | } else { |
3300 | /* There are no stateful ACLs in use on this datapath, | |
3301 | * so a "drop" ACL is simply the "drop" logical flow action | |
3302 | * in all cases. */ | |
d383eed5 JP |
3303 | ds_clear(&actions); |
3304 | build_acl_log(&actions, acl); | |
3305 | ds_put_cstr(&actions, "/* drop */"); | |
17bfa2aa HZ |
3306 | ovn_lflow_add_with_hint(lflows, od, stage, |
3307 | acl->priority + OVN_ACL_PRI_OFFSET, | |
d383eed5 JP |
3308 | acl->match, ds_cstr(&actions), |
3309 | stage_hint); | |
cc58e1f2 | 3310 | } |
d383eed5 JP |
3311 | ds_destroy(&match); |
3312 | ds_destroy(&actions); | |
78aab811 | 3313 | } |
17bfa2aa | 3314 | free(stage_hint); |
78aab811 | 3315 | } |
281977f7 NS |
3316 | |
3317 | /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all | |
302eda27 NS |
3318 | * logical ports of the datapath if the CMS has configured DHCPv4 options. |
3319 | * */ | |
052fa3ac BP |
3320 | for (size_t i = 0; i < od->nbs->n_ports; i++) { |
3321 | if (od->nbs->ports[i]->dhcpv4_options) { | |
3322 | const char *server_id = smap_get( | |
3323 | &od->nbs->ports[i]->dhcpv4_options->options, "server_id"); | |
3324 | const char *server_mac = smap_get( | |
3325 | &od->nbs->ports[i]->dhcpv4_options->options, "server_mac"); | |
3326 | const char *lease_time = smap_get( | |
3327 | &od->nbs->ports[i]->dhcpv4_options->options, "lease_time"); | |
b89d25e5 | 3328 | if (server_id && server_mac && lease_time) { |
052fa3ac BP |
3329 | struct ds match = DS_EMPTY_INITIALIZER; |
3330 | const char *actions = | |
3331 | has_stateful ? "ct_commit; next;" : "next;"; | |
3332 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
3333 | "&& ip4.src == %s && udp && udp.src == 67 " | |
3334 | "&& udp.dst == 68", od->nbs->ports[i]->name, | |
3335 | server_mac, server_id); | |
3336 | ovn_lflow_add( | |
3337 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
3338 | actions); | |
75e82c17 | 3339 | ds_destroy(&match); |
281977f7 | 3340 | } |
052fa3ac | 3341 | } |
33ac3c83 | 3342 | |
052fa3ac BP |
3343 | if (od->nbs->ports[i]->dhcpv6_options) { |
3344 | const char *server_mac = smap_get( | |
3345 | &od->nbs->ports[i]->dhcpv6_options->options, "server_id"); | |
3346 | struct eth_addr ea; | |
3347 | if (server_mac && eth_addr_from_string(server_mac, &ea)) { | |
3348 | /* Get the link local IP of the DHCPv6 server from the | |
3349 | * server MAC. */ | |
3350 | struct in6_addr lla; | |
3351 | in6_generate_lla(ea, &lla); | |
3352 | ||
3353 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
3354 | ipv6_string_mapped(server_ip, &lla); | |
3355 | ||
3356 | struct ds match = DS_EMPTY_INITIALIZER; | |
3357 | const char *actions = has_stateful ? "ct_commit; next;" : | |
3358 | "next;"; | |
3359 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
3360 | "&& ip6.src == %s && udp && udp.src == 547 " | |
3361 | "&& udp.dst == 546", od->nbs->ports[i]->name, | |
3362 | server_mac, server_ip); | |
3363 | ovn_lflow_add( | |
3364 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
3365 | actions); | |
75e82c17 | 3366 | ds_destroy(&match); |
33ac3c83 | 3367 | } |
281977f7 NS |
3368 | } |
3369 | } | |
302eda27 NS |
3370 | |
3371 | /* Add a 34000 priority flow to advance the DNS reply from ovn-controller, | |
3372 | * if the CMS has configured DNS records for the datapath. | |
3373 | */ | |
3374 | if (ls_has_dns_records(od->nbs)) { | |
3375 | const char *actions = has_stateful ? "ct_commit; next;" : "next;"; | |
3376 | ovn_lflow_add( | |
3377 | lflows, od, S_SWITCH_OUT_ACL, 34000, "udp.src == 53", | |
3378 | actions); | |
3379 | } | |
78aab811 JP |
3380 | } |
3381 | ||
1a03fc7d BS |
3382 | static void |
3383 | build_qos(struct ovn_datapath *od, struct hmap *lflows) { | |
3384 | ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;"); | |
3385 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;"); | |
3386 | ||
3387 | for (size_t i = 0; i < od->nbs->n_qos_rules; i++) { | |
3388 | struct nbrec_qos *qos = od->nbs->qos_rules[i]; | |
3389 | bool ingress = !strcmp(qos->direction, "from-lport") ? true :false; | |
3390 | enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK; | |
3391 | ||
3392 | if (!strcmp(qos->key_action, "dscp")) { | |
3393 | struct ds dscp_action = DS_EMPTY_INITIALIZER; | |
3394 | ||
3395 | ds_put_format(&dscp_action, "ip.dscp = %d; next;", | |
3396 | (uint8_t)qos->value_action); | |
3397 | ovn_lflow_add(lflows, od, stage, | |
3398 | qos->priority, | |
3399 | qos->match, ds_cstr(&dscp_action)); | |
3400 | ds_destroy(&dscp_action); | |
3401 | } | |
3402 | } | |
3403 | } | |
3404 | ||
7a15be69 GS |
3405 | static void |
3406 | build_lb(struct ovn_datapath *od, struct hmap *lflows) | |
3407 | { | |
3408 | /* Ingress and Egress LB Table (Priority 0): Packets are allowed by | |
3409 | * default. */ | |
3410 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); | |
3411 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); | |
3412 | ||
3413 | if (od->nbs->load_balancer) { | |
3414 | /* Ingress and Egress LB Table (Priority 65535). | |
3415 | * | |
3416 | * Send established traffic through conntrack for just NAT. */ | |
3417 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX, | |
3418 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
3419 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
3420 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX, | |
3421 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
3422 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
3423 | } | |
3424 | } | |
3425 | ||
fa313a8c GS |
3426 | static void |
3427 | build_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
3428 | { | |
3429 | /* Ingress and Egress stateful Table (Priority 0): Packets are | |
3430 | * allowed by default. */ | |
3431 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;"); | |
3432 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;"); | |
3433 | ||
3434 | /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be | |
b73db61d | 3435 | * committed to conntrack. We always set ct_label.blocked to 0 here as |
cc58e1f2 RB |
3436 | * any packet that makes it this far is part of a connection we |
3437 | * want to allow to continue. */ | |
fa313a8c | 3438 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, |
cc58e1f2 | 3439 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
fa313a8c | 3440 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, |
cc58e1f2 | 3441 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
7a15be69 GS |
3442 | |
3443 | /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent | |
3444 | * through nat (without committing). | |
3445 | * | |
3446 | * REGBIT_CONNTRACK_COMMIT is set for new connections and | |
3447 | * REGBIT_CONNTRACK_NAT is set for established connections. So they | |
3448 | * don't overlap. | |
3449 | */ | |
3450 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, | |
3451 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
3452 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, | |
3453 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
3454 | ||
3455 | /* Load balancing rules for new connections get committed to conntrack | |
3456 | * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table | |
3457 | * a higher priority rule for load balancing below also commits the | |
3458 | * connection, so it is okay if we do not hit the above match on | |
3459 | * REGBIT_CONNTRACK_COMMIT. */ | |
61591ad9 GS |
3460 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { |
3461 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
3462 | struct smap *vips = &lb->vips; |
3463 | struct smap_node *node; | |
3464 | ||
3465 | SMAP_FOR_EACH (node, vips) { | |
3466 | uint16_t port = 0; | |
485d373b | 3467 | int addr_family; |
7a15be69 GS |
3468 | |
3469 | /* node->key contains IP:port or just IP. */ | |
3470 | char *ip_address = NULL; | |
485d373b MM |
3471 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port, |
3472 | &addr_family); | |
7a15be69 GS |
3473 | if (!ip_address) { |
3474 | continue; | |
3475 | } | |
3476 | ||
3477 | /* New connections in Ingress table. */ | |
3478 | char *action = xasprintf("ct_lb(%s);", node->value); | |
3479 | struct ds match = DS_EMPTY_INITIALIZER; | |
485d373b MM |
3480 | if (addr_family == AF_INET) { |
3481 | ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address); | |
3482 | } else { | |
3483 | ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address); | |
3484 | } | |
7a15be69 GS |
3485 | if (port) { |
3486 | if (lb->protocol && !strcmp(lb->protocol, "udp")) { | |
9784ffaf | 3487 | ds_put_format(&match, " && udp.dst == %d", port); |
7a15be69 | 3488 | } else { |
9784ffaf | 3489 | ds_put_format(&match, " && tcp.dst == %d", port); |
7a15be69 GS |
3490 | } |
3491 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
3492 | 120, ds_cstr(&match), action); | |
3493 | } else { | |
3494 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
3495 | 110, ds_cstr(&match), action); | |
3496 | } | |
3497 | ||
7443e4ec | 3498 | free(ip_address); |
7a15be69 GS |
3499 | ds_destroy(&match); |
3500 | free(action); | |
3501 | } | |
3502 | } | |
fa313a8c GS |
3503 | } |
3504 | ||
bd39395f | 3505 | static void |
9975d7be BP |
3506 | build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, |
3507 | struct hmap *lflows, struct hmap *mcgroups) | |
bd39395f | 3508 | { |
5cff6b99 BP |
3509 | /* This flow table structure is documented in ovn-northd(8), so please |
3510 | * update ovn-northd.8.xml if you change anything. */ | |
3511 | ||
09b39248 JP |
3512 | struct ds match = DS_EMPTY_INITIALIZER; |
3513 | struct ds actions = DS_EMPTY_INITIALIZER; | |
3514 | ||
9975d7be | 3515 | /* Build pre-ACL and ACL tables for both ingress and egress. |
1a03fc7d | 3516 | * Ingress tables 3 through 9. Egress tables 0 through 6. */ |
5868eb24 BP |
3517 | struct ovn_datapath *od; |
3518 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
3519 | if (!od->nbs) { |
3520 | continue; | |
3521 | } | |
3522 | ||
9ab989b7 | 3523 | build_pre_acls(od, lflows); |
7a15be69 | 3524 | build_pre_lb(od, lflows); |
facf8652 | 3525 | build_pre_stateful(od, lflows); |
2d018f9b | 3526 | build_acls(od, lflows); |
1a03fc7d | 3527 | build_qos(od, lflows); |
7a15be69 | 3528 | build_lb(od, lflows); |
fa313a8c | 3529 | build_stateful(od, lflows); |
9975d7be BP |
3530 | } |
3531 | ||
3532 | /* Logical switch ingress table 0: Admission control framework (priority | |
3533 | * 100). */ | |
3534 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3535 | if (!od->nbs) { | |
3536 | continue; | |
3537 | } | |
3538 | ||
bd39395f | 3539 | /* Logical VLANs not supported. */ |
685f4dfe | 3540 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present", |
091e3af9 | 3541 | "drop;"); |
bd39395f BP |
3542 | |
3543 | /* Broadcast/multicast source address is invalid. */ | |
685f4dfe | 3544 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", |
091e3af9 | 3545 | "drop;"); |
bd39395f | 3546 | |
35060cdc BP |
3547 | /* Port security flows have priority 50 (see below) and will continue |
3548 | * to the next table if packet source is acceptable. */ | |
bd39395f BP |
3549 | } |
3550 | ||
685f4dfe NS |
3551 | /* Logical switch ingress table 0: Ingress port security - L2 |
3552 | * (priority 50). | |
3553 | * Ingress table 1: Ingress port security - IP (priority 90 and 80) | |
3554 | * Ingress table 2: Ingress port security - ND (priority 90 and 80) | |
3555 | */ | |
5868eb24 BP |
3556 | struct ovn_port *op; |
3557 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3558 | if (!op->nbsp) { |
9975d7be BP |
3559 | continue; |
3560 | } | |
3561 | ||
0ee00741 | 3562 | if (!lsp_is_enabled(op->nbsp)) { |
96af668a BP |
3563 | /* Drop packets from disabled logical ports (since logical flow |
3564 | * tables are default-drop). */ | |
3565 | continue; | |
3566 | } | |
3567 | ||
09b39248 | 3568 | ds_clear(&match); |
a6095f81 | 3569 | ds_clear(&actions); |
9975d7be | 3570 | ds_put_format(&match, "inport == %s", op->json_key); |
e93b43d6 JP |
3571 | build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs, |
3572 | &match); | |
a6095f81 BS |
3573 | |
3574 | const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id"); | |
3575 | if (queue_id) { | |
3576 | ds_put_format(&actions, "set_queue(%s); ", queue_id); | |
3577 | } | |
3578 | ds_put_cstr(&actions, "next;"); | |
685f4dfe | 3579 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50, |
a6095f81 | 3580 | ds_cstr(&match), ds_cstr(&actions)); |
685f4dfe | 3581 | |
0ee00741 | 3582 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
3583 | build_port_security_ip(P_IN, op, lflows); |
3584 | build_port_security_nd(op, lflows); | |
3585 | } | |
3586 | } | |
3587 | ||
3588 | /* Ingress table 1 and 2: Port security - IP and ND, by default goto next. | |
3589 | * (priority 0)*/ | |
3590 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3591 | if (!od->nbs) { | |
3592 | continue; | |
3593 | } | |
3594 | ||
3595 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;"); | |
3596 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); | |
5868eb24 | 3597 | } |
445a266a | 3598 | |
1a03fc7d | 3599 | /* Ingress table 10: ARP/ND responder, skip requests coming from localnet |
0b077281 RR |
3600 | * and vtep ports. (priority 100); see ovn-northd.8.xml for the |
3601 | * rationale. */ | |
fa128126 | 3602 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3603 | if (!op->nbsp) { |
fa128126 HZ |
3604 | continue; |
3605 | } | |
3606 | ||
0b077281 RR |
3607 | if ((!strcmp(op->nbsp->type, "localnet")) || |
3608 | (!strcmp(op->nbsp->type, "vtep"))) { | |
09b39248 JP |
3609 | ds_clear(&match); |
3610 | ds_put_format(&match, "inport == %s", op->json_key); | |
e75451fe | 3611 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, |
09b39248 | 3612 | ds_cstr(&match), "next;"); |
fa128126 HZ |
3613 | } |
3614 | } | |
3615 | ||
1a03fc7d | 3616 | /* Ingress table 10: ARP/ND responder, reply for known IPs. |
fa128126 | 3617 | * (priority 50). */ |
57d143eb | 3618 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3619 | if (!op->nbsp) { |
57d143eb HZ |
3620 | continue; |
3621 | } | |
3622 | ||
4c7bf534 | 3623 | /* |
e75451fe | 3624 | * Add ARP/ND reply flows if either the |
4c7bf534 | 3625 | * - port is up or |
2a38ef45 DA |
3626 | * - port type is router or |
3627 | * - port type is localport | |
4c7bf534 | 3628 | */ |
2a38ef45 DA |
3629 | if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router") && |
3630 | strcmp(op->nbsp->type, "localport")) { | |
4c7bf534 NS |
3631 | continue; |
3632 | } | |
3633 | ||
e93b43d6 JP |
3634 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
3635 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
09b39248 | 3636 | ds_clear(&match); |
e93b43d6 JP |
3637 | ds_put_format(&match, "arp.tpa == %s && arp.op == 1", |
3638 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
09b39248 JP |
3639 | ds_clear(&actions); |
3640 | ds_put_format(&actions, | |
57d143eb | 3641 | "eth.dst = eth.src; " |
e93b43d6 | 3642 | "eth.src = %s; " |
57d143eb HZ |
3643 | "arp.op = 2; /* ARP reply */ " |
3644 | "arp.tha = arp.sha; " | |
e93b43d6 | 3645 | "arp.sha = %s; " |
57d143eb | 3646 | "arp.tpa = arp.spa; " |
e93b43d6 | 3647 | "arp.spa = %s; " |
57d143eb | 3648 | "outport = inport; " |
bf143492 | 3649 | "flags.loopback = 1; " |
57d143eb | 3650 | "output;", |
e93b43d6 JP |
3651 | op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, |
3652 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
e75451fe | 3653 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 3654 | ds_cstr(&match), ds_cstr(&actions)); |
9fcb6a18 BP |
3655 | |
3656 | /* Do not reply to an ARP request from the port that owns the | |
3657 | * address (otherwise a DHCP client that ARPs to check for a | |
3658 | * duplicate address will fail). Instead, forward it the usual | |
3659 | * way. | |
3660 | * | |
3661 | * (Another alternative would be to simply drop the packet. If | |
3662 | * everything is working as it is configured, then this would | |
3663 | * produce equivalent results, since no one should reply to the | |
3664 | * request. But ARPing for one's own IP address is intended to | |
3665 | * detect situations where the network is not working as | |
3666 | * configured, so dropping the request would frustrate that | |
3667 | * intent.) */ | |
3668 | ds_put_format(&match, " && inport == %s", op->json_key); | |
3669 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, | |
3670 | ds_cstr(&match), "next;"); | |
57d143eb | 3671 | } |
7dc88496 | 3672 | |
6fdb7cd6 JP |
3673 | /* For ND solicitations, we need to listen for both the |
3674 | * unicast IPv6 address and its all-nodes multicast address, | |
3675 | * but always respond with the unicast IPv6 address. */ | |
3676 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
09b39248 | 3677 | ds_clear(&match); |
6fdb7cd6 JP |
3678 | ds_put_format(&match, |
3679 | "nd_ns && ip6.dst == {%s, %s} && nd.target == %s", | |
3680 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3681 | op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s, | |
3682 | op->lsp_addrs[i].ipv6_addrs[j].addr_s); | |
3683 | ||
09b39248 JP |
3684 | ds_clear(&actions); |
3685 | ds_put_format(&actions, | |
6fdb7cd6 JP |
3686 | "nd_na { " |
3687 | "eth.src = %s; " | |
3688 | "ip6.src = %s; " | |
3689 | "nd.target = %s; " | |
3690 | "nd.tll = %s; " | |
3691 | "outport = inport; " | |
bf143492 | 3692 | "flags.loopback = 1; " |
6fdb7cd6 JP |
3693 | "output; " |
3694 | "};", | |
3695 | op->lsp_addrs[i].ea_s, | |
3696 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3697 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3698 | op->lsp_addrs[i].ea_s); | |
e75451fe | 3699 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 3700 | ds_cstr(&match), ds_cstr(&actions)); |
9fcb6a18 BP |
3701 | |
3702 | /* Do not reply to a solicitation from the port that owns the | |
3703 | * address (otherwise DAD detection will fail). */ | |
3704 | ds_put_format(&match, " && inport == %s", op->json_key); | |
3705 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, | |
3706 | ds_cstr(&match), "next;"); | |
e75451fe | 3707 | } |
57d143eb HZ |
3708 | } |
3709 | } | |
3710 | ||
1a03fc7d | 3711 | /* Ingress table 10: ARP/ND responder, by default goto next. |
fa128126 HZ |
3712 | * (priority 0)*/ |
3713 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3714 | if (!od->nbs) { | |
3715 | continue; | |
3716 | } | |
3717 | ||
e75451fe | 3718 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); |
fa128126 HZ |
3719 | } |
3720 | ||
1a03fc7d | 3721 | /* Logical switch ingress table 11 and 12: DHCP options and response |
281977f7 NS |
3722 | * priority 100 flows. */ |
3723 | HMAP_FOR_EACH (op, key_node, ports) { | |
3724 | if (!op->nbsp) { | |
3725 | continue; | |
3726 | } | |
3727 | ||
3728 | if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) { | |
3729 | /* Don't add the DHCP flows if the port is not enabled or if the | |
3730 | * port is a router port. */ | |
3731 | continue; | |
3732 | } | |
3733 | ||
33ac3c83 NS |
3734 | if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) { |
3735 | /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport. | |
3736 | */ | |
281977f7 NS |
3737 | continue; |
3738 | } | |
3739 | ||
3740 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { | |
3741 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
3742 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
3743 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
213615b3 | 3744 | struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER; |
281977f7 NS |
3745 | if (build_dhcpv4_action( |
3746 | op, op->lsp_addrs[i].ipv4_addrs[j].addr, | |
213615b3 | 3747 | &options_action, &response_action, &ipv4_addr_match)) { |
71f21279 | 3748 | ds_clear(&match); |
281977f7 NS |
3749 | ds_put_format( |
3750 | &match, "inport == %s && eth.src == %s && " | |
3751 | "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && " | |
3752 | "udp.src == 68 && udp.dst == 67", op->json_key, | |
3753 | op->lsp_addrs[i].ea_s); | |
3754 | ||
3755 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, | |
3756 | 100, ds_cstr(&match), | |
3757 | ds_cstr(&options_action)); | |
213615b3 NS |
3758 | ds_clear(&match); |
3759 | /* Allow ip4.src = OFFER_IP and | |
3760 | * ip4.dst = {SERVER_IP, 255.255.255.255} for the below | |
3761 | * cases | |
3762 | * - When the client wants to renew the IP by sending | |
3763 | * the DHCPREQUEST to the server ip. | |
3764 | * - When the client wants to renew the IP by | |
3765 | * broadcasting the DHCPREQUEST. | |
3766 | */ | |
3767 | ds_put_format( | |
3768 | &match, "inport == %s && eth.src == %s && " | |
3769 | "%s && udp.src == 68 && udp.dst == 67", op->json_key, | |
3770 | op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match)); | |
3771 | ||
3772 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, | |
3773 | 100, ds_cstr(&match), | |
3774 | ds_cstr(&options_action)); | |
3775 | ds_clear(&match); | |
3776 | ||
281977f7 | 3777 | /* If REGBIT_DHCP_OPTS_RESULT is set, it means the |
213615b3 NS |
3778 | * put_dhcp_opts action is successful. */ |
3779 | ds_put_format( | |
3780 | &match, "inport == %s && eth.src == %s && " | |
3781 | "ip4 && udp.src == 68 && udp.dst == 67" | |
3782 | " && "REGBIT_DHCP_OPTS_RESULT, op->json_key, | |
3783 | op->lsp_addrs[i].ea_s); | |
281977f7 NS |
3784 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, |
3785 | 100, ds_cstr(&match), | |
3786 | ds_cstr(&response_action)); | |
281977f7 NS |
3787 | ds_destroy(&options_action); |
3788 | ds_destroy(&response_action); | |
213615b3 | 3789 | ds_destroy(&ipv4_addr_match); |
281977f7 NS |
3790 | break; |
3791 | } | |
3792 | } | |
33ac3c83 NS |
3793 | |
3794 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
3795 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
3796 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
3797 | if (build_dhcpv6_action( | |
3798 | op, &op->lsp_addrs[i].ipv6_addrs[j].addr, | |
3799 | &options_action, &response_action)) { | |
71f21279 | 3800 | ds_clear(&match); |
33ac3c83 NS |
3801 | ds_put_format( |
3802 | &match, "inport == %s && eth.src == %s" | |
3803 | " && ip6.dst == ff02::1:2 && udp.src == 546 &&" | |
3804 | " udp.dst == 547", op->json_key, | |
3805 | op->lsp_addrs[i].ea_s); | |
3806 | ||
3807 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100, | |
3808 | ds_cstr(&match), ds_cstr(&options_action)); | |
3809 | ||
3810 | /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the | |
3811 | * put_dhcpv6_opts action is successful */ | |
3812 | ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT); | |
3813 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100, | |
3814 | ds_cstr(&match), ds_cstr(&response_action)); | |
33ac3c83 NS |
3815 | ds_destroy(&options_action); |
3816 | ds_destroy(&response_action); | |
3817 | break; | |
3818 | } | |
3819 | } | |
281977f7 NS |
3820 | } |
3821 | } | |
3822 | ||
302eda27 NS |
3823 | /* Logical switch ingress table 13 and 14: DNS lookup and response |
3824 | * priority 100 flows. | |
3825 | */ | |
3826 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3827 | if (!od->nbs || !ls_has_dns_records(od->nbs)) { | |
3828 | continue; | |
3829 | } | |
3830 | ||
71f21279 BP |
3831 | struct ds action = DS_EMPTY_INITIALIZER; |
3832 | ||
3833 | ds_clear(&match); | |
302eda27 NS |
3834 | ds_put_cstr(&match, "udp.dst == 53"); |
3835 | ds_put_format(&action, | |
3836 | REGBIT_DNS_LOOKUP_RESULT" = dns_lookup(); next;"); | |
3837 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 100, | |
3838 | ds_cstr(&match), ds_cstr(&action)); | |
3839 | ds_clear(&action); | |
3840 | ds_put_cstr(&match, " && "REGBIT_DNS_LOOKUP_RESULT); | |
3841 | ds_put_format(&action, "eth.dst <-> eth.src; ip4.src <-> ip4.dst; " | |
3842 | "udp.dst = udp.src; udp.src = 53; outport = inport; " | |
3843 | "flags.loopback = 1; output;"); | |
3844 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, | |
3845 | ds_cstr(&match), ds_cstr(&action)); | |
3846 | ds_clear(&action); | |
3847 | ds_put_format(&action, "eth.dst <-> eth.src; ip6.src <-> ip6.dst; " | |
3848 | "udp.dst = udp.src; udp.src = 53; outport = inport; " | |
3849 | "flags.loopback = 1; output;"); | |
3850 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 100, | |
3851 | ds_cstr(&match), ds_cstr(&action)); | |
302eda27 NS |
3852 | ds_destroy(&action); |
3853 | } | |
3854 | ||
1a03fc7d | 3855 | /* Ingress table 11 and 12: DHCP options and response, by default goto next. |
302eda27 NS |
3856 | * (priority 0). |
3857 | * Ingress table 13 and 14: DNS lookup and response, by default goto next. | |
3858 | * (priority 0).*/ | |
281977f7 NS |
3859 | |
3860 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3861 | if (!od->nbs) { | |
3862 | continue; | |
3863 | } | |
3864 | ||
3865 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;"); | |
3866 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); | |
302eda27 NS |
3867 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_LOOKUP, 0, "1", "next;"); |
3868 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DNS_RESPONSE, 0, "1", "next;"); | |
281977f7 NS |
3869 | } |
3870 | ||
302eda27 | 3871 | /* Ingress table 15: Destination lookup, broadcast and multicast handling |
5868eb24 BP |
3872 | * (priority 100). */ |
3873 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3874 | if (!op->nbsp) { |
9975d7be BP |
3875 | continue; |
3876 | } | |
3877 | ||
0ee00741 | 3878 | if (lsp_is_enabled(op->nbsp)) { |
9975d7be | 3879 | ovn_multicast_add(mcgroups, &mc_flood, op); |
445a266a | 3880 | } |
5868eb24 BP |
3881 | } |
3882 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
3883 | if (!od->nbs) { |
3884 | continue; | |
3885 | } | |
3886 | ||
3887 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", | |
5868eb24 | 3888 | "outport = \""MC_FLOOD"\"; output;"); |
bd39395f | 3889 | } |
bd39395f | 3890 | |
1a03fc7d | 3891 | /* Ingress table 13: Destination lookup, unicast handling (priority 50), */ |
5868eb24 | 3892 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3893 | if (!op->nbsp) { |
9975d7be BP |
3894 | continue; |
3895 | } | |
3896 | ||
0ee00741 | 3897 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { |
10c3fcdf | 3898 | /* Addresses are owned by the logical port. |
3899 | * Ethernet address followed by zero or more IPv4 | |
3900 | * or IPv6 addresses (or both). */ | |
74ff3298 | 3901 | struct eth_addr mac; |
10c3fcdf | 3902 | if (ovs_scan(op->nbsp->addresses[i], |
3903 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
09b39248 | 3904 | ds_clear(&match); |
9975d7be BP |
3905 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, |
3906 | ETH_ADDR_ARGS(mac)); | |
5868eb24 | 3907 | |
09b39248 | 3908 | ds_clear(&actions); |
9975d7be BP |
3909 | ds_put_format(&actions, "outport = %s; output;", op->json_key); |
3910 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
5868eb24 | 3911 | ds_cstr(&match), ds_cstr(&actions)); |
0ee00741 HK |
3912 | } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { |
3913 | if (lsp_is_enabled(op->nbsp)) { | |
9975d7be | 3914 | ovn_multicast_add(mcgroups, &mc_unknown, op); |
96af668a BP |
3915 | op->od->has_unknown = true; |
3916 | } | |
6374d518 | 3917 | } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) { |
8639f9be | 3918 | if (!op->nbsp->dynamic_addresses |
10c3fcdf | 3919 | || !ovs_scan(op->nbsp->dynamic_addresses, |
3920 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
8639f9be ND |
3921 | continue; |
3922 | } | |
3923 | ds_clear(&match); | |
3924 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, | |
3925 | ETH_ADDR_ARGS(mac)); | |
3926 | ||
3927 | ds_clear(&actions); | |
3928 | ds_put_format(&actions, "outport = %s; output;", op->json_key); | |
3929 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
20418099 MS |
3930 | ds_cstr(&match), ds_cstr(&actions)); |
3931 | } else if (!strcmp(op->nbsp->addresses[i], "router")) { | |
3932 | if (!op->peer || !op->peer->nbrp | |
3933 | || !ovs_scan(op->peer->nbrp->mac, | |
3934 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
3935 | continue; | |
3936 | } | |
3937 | ds_clear(&match); | |
3938 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, | |
3939 | ETH_ADDR_ARGS(mac)); | |
41a15b71 MS |
3940 | if (op->peer->od->l3dgw_port |
3941 | && op->peer == op->peer->od->l3dgw_port | |
3942 | && op->peer->od->l3redirect_port) { | |
3943 | /* The destination lookup flow for the router's | |
3944 | * distributed gateway port MAC address should only be | |
3945 | * programmed on the "redirect-chassis". */ | |
3946 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
3947 | op->peer->od->l3redirect_port->json_key); | |
3948 | } | |
20418099 MS |
3949 | |
3950 | ds_clear(&actions); | |
3951 | ds_put_format(&actions, "outport = %s; output;", op->json_key); | |
3952 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
8639f9be | 3953 | ds_cstr(&match), ds_cstr(&actions)); |
06a26dd2 MS |
3954 | |
3955 | /* Add ethernet addresses specified in NAT rules on | |
3956 | * distributed logical routers. */ | |
3957 | if (op->peer->od->l3dgw_port | |
3958 | && op->peer == op->peer->od->l3dgw_port) { | |
71f21279 | 3959 | for (int j = 0; j < op->peer->od->nbr->n_nat; j++) { |
06a26dd2 | 3960 | const struct nbrec_nat *nat |
71f21279 | 3961 | = op->peer->od->nbr->nat[j]; |
06a26dd2 MS |
3962 | if (!strcmp(nat->type, "dnat_and_snat") |
3963 | && nat->logical_port && nat->external_mac | |
3964 | && eth_addr_from_string(nat->external_mac, &mac)) { | |
3965 | ||
3966 | ds_clear(&match); | |
3967 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT | |
3968 | " && is_chassis_resident(\"%s\")", | |
3969 | ETH_ADDR_ARGS(mac), | |
3970 | nat->logical_port); | |
3971 | ||
3972 | ds_clear(&actions); | |
3973 | ds_put_format(&actions, "outport = %s; output;", | |
3974 | op->json_key); | |
3975 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, | |
3976 | 50, ds_cstr(&match), | |
3977 | ds_cstr(&actions)); | |
3978 | } | |
3979 | } | |
3980 | } | |
5868eb24 BP |
3981 | } else { |
3982 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
445a266a | 3983 | |
2fa326a3 BP |
3984 | VLOG_INFO_RL(&rl, |
3985 | "%s: invalid syntax '%s' in addresses column", | |
0ee00741 | 3986 | op->nbsp->name, op->nbsp->addresses[i]); |
445a266a BP |
3987 | } |
3988 | } | |
bd39395f BP |
3989 | } |
3990 | ||
1a03fc7d | 3991 | /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */ |
5868eb24 | 3992 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
3993 | if (!od->nbs) { |
3994 | continue; | |
3995 | } | |
3996 | ||
5868eb24 | 3997 | if (od->has_unknown) { |
9975d7be | 3998 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", |
5868eb24 | 3999 | "outport = \""MC_UNKNOWN"\"; output;"); |
445a266a | 4000 | } |
bd39395f BP |
4001 | } |
4002 | ||
94300e09 JP |
4003 | /* Egress tables 6: Egress port security - IP (priority 0) |
4004 | * Egress table 7: Egress port security L2 - multicast/broadcast | |
4005 | * (priority 100). */ | |
5868eb24 | 4006 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
4007 | if (!od->nbs) { |
4008 | continue; | |
4009 | } | |
4010 | ||
685f4dfe NS |
4011 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;"); |
4012 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast", | |
091e3af9 | 4013 | "output;"); |
48f42f3a RB |
4014 | } |
4015 | ||
94300e09 | 4016 | /* Egress table 6: Egress port security - IP (priorities 90 and 80) |
685f4dfe NS |
4017 | * if port security enabled. |
4018 | * | |
94300e09 | 4019 | * Egress table 7: Egress port security - L2 (priorities 50 and 150). |
d770a830 BP |
4020 | * |
4021 | * Priority 50 rules implement port security for enabled logical port. | |
4022 | * | |
4023 | * Priority 150 rules drop packets to disabled logical ports, so that they | |
4024 | * don't even receive multicast or broadcast packets. */ | |
5868eb24 | 4025 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 4026 | if (!op->nbsp) { |
9975d7be BP |
4027 | continue; |
4028 | } | |
4029 | ||
09b39248 | 4030 | ds_clear(&match); |
9975d7be | 4031 | ds_put_format(&match, "outport == %s", op->json_key); |
0ee00741 | 4032 | if (lsp_is_enabled(op->nbsp)) { |
e93b43d6 JP |
4033 | build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs, |
4034 | &match); | |
685f4dfe | 4035 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50, |
d770a830 BP |
4036 | ds_cstr(&match), "output;"); |
4037 | } else { | |
685f4dfe | 4038 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150, |
d770a830 BP |
4039 | ds_cstr(&match), "drop;"); |
4040 | } | |
eb00399e | 4041 | |
0ee00741 | 4042 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
4043 | build_port_security_ip(P_OUT, op, lflows); |
4044 | } | |
eb00399e | 4045 | } |
09b39248 JP |
4046 | |
4047 | ds_destroy(&match); | |
4048 | ds_destroy(&actions); | |
9975d7be | 4049 | } |
eb00399e | 4050 | |
9975d7be BP |
4051 | static bool |
4052 | lrport_is_enabled(const struct nbrec_logical_router_port *lrport) | |
4053 | { | |
4054 | return !lrport->enabled || *lrport->enabled; | |
4055 | } | |
4056 | ||
4685e523 JP |
4057 | /* Returns a string of the IP address of the router port 'op' that |
4058 | * overlaps with 'ip_s". If one is not found, returns NULL. | |
4059 | * | |
4060 | * The caller must not free the returned string. */ | |
4061 | static const char * | |
4062 | find_lrp_member_ip(const struct ovn_port *op, const char *ip_s) | |
4063 | { | |
6fdb7cd6 | 4064 | bool is_ipv4 = strchr(ip_s, '.') ? true : false; |
4685e523 | 4065 | |
6fdb7cd6 JP |
4066 | if (is_ipv4) { |
4067 | ovs_be32 ip; | |
4685e523 | 4068 | |
6fdb7cd6 JP |
4069 | if (!ip_parse(ip_s, &ip)) { |
4070 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4071 | VLOG_WARN_RL(&rl, "bad ip address %s", ip_s); | |
4072 | return NULL; | |
4073 | } | |
4685e523 | 4074 | |
6fdb7cd6 JP |
4075 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
4076 | const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i]; | |
4077 | ||
4078 | if (!((na->network ^ ip) & na->mask)) { | |
4079 | /* There should be only 1 interface that matches the | |
4080 | * supplied IP. Otherwise, it's a configuration error, | |
4081 | * because subnets of a router's interfaces should NOT | |
4082 | * overlap. */ | |
4083 | return na->addr_s; | |
4084 | } | |
4085 | } | |
4086 | } else { | |
4087 | struct in6_addr ip6; | |
4088 | ||
4089 | if (!ipv6_parse(ip_s, &ip6)) { | |
4090 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4091 | VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s); | |
4092 | return NULL; | |
4093 | } | |
4094 | ||
4095 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
4096 | const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i]; | |
4097 | struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6); | |
4098 | struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask); | |
4099 | ||
4100 | if (ipv6_is_zero(&and_addr)) { | |
4101 | /* There should be only 1 interface that matches the | |
4102 | * supplied IP. Otherwise, it's a configuration error, | |
4103 | * because subnets of a router's interfaces should NOT | |
4104 | * overlap. */ | |
4105 | return na->addr_s; | |
4106 | } | |
4685e523 JP |
4107 | } |
4108 | } | |
4109 | ||
4110 | return NULL; | |
4111 | } | |
4112 | ||
9975d7be | 4113 | static void |
0bac7164 | 4114 | add_route(struct hmap *lflows, const struct ovn_port *op, |
4685e523 | 4115 | const char *lrp_addr_s, const char *network_s, int plen, |
440a9f4b | 4116 | const char *gateway, const char *policy) |
9975d7be | 4117 | { |
6fdb7cd6 | 4118 | bool is_ipv4 = strchr(network_s, '.') ? true : false; |
a63f7235 | 4119 | struct ds match = DS_EMPTY_INITIALIZER; |
440a9f4b GS |
4120 | const char *dir; |
4121 | uint16_t priority; | |
4122 | ||
4123 | if (policy && !strcmp(policy, "src-ip")) { | |
4124 | dir = "src"; | |
4125 | priority = plen * 2; | |
4126 | } else { | |
4127 | dir = "dst"; | |
4128 | priority = (plen * 2) + 1; | |
4129 | } | |
6fdb7cd6 | 4130 | |
a63f7235 JP |
4131 | /* IPv6 link-local addresses must be scoped to the local router port. */ |
4132 | if (!is_ipv4) { | |
4133 | struct in6_addr network; | |
4134 | ovs_assert(ipv6_parse(network_s, &network)); | |
4135 | if (in6_is_lla(&network)) { | |
4136 | ds_put_format(&match, "inport == %s && ", op->json_key); | |
4137 | } | |
4138 | } | |
440a9f4b | 4139 | ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir, |
a63f7235 | 4140 | network_s, plen); |
9975d7be BP |
4141 | |
4142 | struct ds actions = DS_EMPTY_INITIALIZER; | |
6fdb7cd6 JP |
4143 | ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx"); |
4144 | ||
9975d7be | 4145 | if (gateway) { |
c9bdf7bd | 4146 | ds_put_cstr(&actions, gateway); |
9975d7be | 4147 | } else { |
6fdb7cd6 | 4148 | ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6"); |
9975d7be | 4149 | } |
4685e523 | 4150 | ds_put_format(&actions, "; " |
6fdb7cd6 | 4151 | "%sreg1 = %s; " |
4685e523 | 4152 | "eth.src = %s; " |
0bac7164 | 4153 | "outport = %s; " |
bf143492 | 4154 | "flags.loopback = 1; " |
0bac7164 | 4155 | "next;", |
6fdb7cd6 | 4156 | is_ipv4 ? "" : "xx", |
4685e523 JP |
4157 | lrp_addr_s, |
4158 | op->lrp_networks.ea_s, | |
4159 | op->json_key); | |
9975d7be BP |
4160 | |
4161 | /* The priority here is calculated to implement longest-prefix-match | |
4162 | * routing. */ | |
440a9f4b | 4163 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority, |
a63f7235 JP |
4164 | ds_cstr(&match), ds_cstr(&actions)); |
4165 | ds_destroy(&match); | |
9975d7be | 4166 | ds_destroy(&actions); |
9975d7be BP |
4167 | } |
4168 | ||
28dc3fe9 SR |
4169 | static void |
4170 | build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od, | |
4171 | struct hmap *ports, | |
4172 | const struct nbrec_logical_router_static_route *route) | |
4173 | { | |
6fdb7cd6 | 4174 | ovs_be32 nexthop; |
56f9a57a | 4175 | const char *lrp_addr_s = NULL; |
6fdb7cd6 JP |
4176 | unsigned int plen; |
4177 | bool is_ipv4; | |
28dc3fe9 | 4178 | |
6fdb7cd6 JP |
4179 | /* Verify that the next hop is an IP address with an all-ones mask. */ |
4180 | char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen); | |
4181 | if (!error) { | |
4182 | if (plen != 32) { | |
4183 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4184 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
4185 | return; | |
4186 | } | |
4187 | is_ipv4 = true; | |
4188 | } else { | |
28dc3fe9 | 4189 | free(error); |
6fdb7cd6 JP |
4190 | |
4191 | struct in6_addr ip6; | |
71f21279 | 4192 | error = ipv6_parse_cidr(route->nexthop, &ip6, &plen); |
6fdb7cd6 JP |
4193 | if (!error) { |
4194 | if (plen != 128) { | |
4195 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4196 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
4197 | return; | |
4198 | } | |
4199 | is_ipv4 = false; | |
4200 | } else { | |
4201 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4202 | VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop); | |
4203 | free(error); | |
4204 | return; | |
4205 | } | |
28dc3fe9 SR |
4206 | } |
4207 | ||
6fdb7cd6 JP |
4208 | char *prefix_s; |
4209 | if (is_ipv4) { | |
4210 | ovs_be32 prefix; | |
4211 | /* Verify that ip prefix is a valid IPv4 address. */ | |
4212 | error = ip_parse_cidr(route->ip_prefix, &prefix, &plen); | |
4213 | if (error) { | |
4214 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4215 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
4216 | route->ip_prefix); | |
4217 | free(error); | |
4218 | return; | |
4219 | } | |
4220 | prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen))); | |
4221 | } else { | |
4222 | /* Verify that ip prefix is a valid IPv6 address. */ | |
4223 | struct in6_addr prefix; | |
4224 | error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen); | |
4225 | if (error) { | |
4226 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4227 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
4228 | route->ip_prefix); | |
4229 | free(error); | |
4230 | return; | |
4231 | } | |
4232 | struct in6_addr mask = ipv6_create_mask(plen); | |
4233 | struct in6_addr network = ipv6_addr_bitand(&prefix, &mask); | |
4234 | prefix_s = xmalloc(INET6_ADDRSTRLEN); | |
4235 | inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN); | |
28dc3fe9 SR |
4236 | } |
4237 | ||
4238 | /* Find the outgoing port. */ | |
4239 | struct ovn_port *out_port = NULL; | |
4240 | if (route->output_port) { | |
4241 | out_port = ovn_port_find(ports, route->output_port); | |
4242 | if (!out_port) { | |
4243 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4244 | VLOG_WARN_RL(&rl, "Bad out port %s for static route %s", | |
4245 | route->output_port, route->ip_prefix); | |
6fdb7cd6 | 4246 | goto free_prefix_s; |
28dc3fe9 | 4247 | } |
4685e523 | 4248 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
8dd5b512 GS |
4249 | if (!lrp_addr_s) { |
4250 | /* There are no IP networks configured on the router's port via | |
4251 | * which 'route->nexthop' is theoretically reachable. But since | |
4252 | * 'out_port' has been specified, we honor it by trying to reach | |
4253 | * 'route->nexthop' via the first IP address of 'out_port'. | |
4254 | * (There are cases, e.g in GCE, where each VM gets a /32 IP | |
4255 | * address and the default gateway is still reachable from it.) */ | |
4256 | if (is_ipv4) { | |
4257 | if (out_port->lrp_networks.n_ipv4_addrs) { | |
4258 | lrp_addr_s = out_port->lrp_networks.ipv4_addrs[0].addr_s; | |
4259 | } | |
4260 | } else { | |
4261 | if (out_port->lrp_networks.n_ipv6_addrs) { | |
4262 | lrp_addr_s = out_port->lrp_networks.ipv6_addrs[0].addr_s; | |
4263 | } | |
4264 | } | |
4265 | } | |
28dc3fe9 SR |
4266 | } else { |
4267 | /* output_port is not specified, find the | |
4268 | * router port matching the next hop. */ | |
4269 | int i; | |
4270 | for (i = 0; i < od->nbr->n_ports; i++) { | |
4271 | struct nbrec_logical_router_port *lrp = od->nbr->ports[i]; | |
4272 | out_port = ovn_port_find(ports, lrp->name); | |
4273 | if (!out_port) { | |
4274 | /* This should not happen. */ | |
4275 | continue; | |
4276 | } | |
4277 | ||
4685e523 JP |
4278 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
4279 | if (lrp_addr_s) { | |
28dc3fe9 SR |
4280 | break; |
4281 | } | |
4282 | } | |
28dc3fe9 SR |
4283 | } |
4284 | ||
7d170821 | 4285 | if (!out_port || !lrp_addr_s) { |
4685e523 JP |
4286 | /* There is no matched out port. */ |
4287 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4288 | VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s", | |
4289 | route->ip_prefix, route->nexthop); | |
6fdb7cd6 | 4290 | goto free_prefix_s; |
4685e523 JP |
4291 | } |
4292 | ||
440a9f4b GS |
4293 | char *policy = route->policy ? route->policy : "dst-ip"; |
4294 | add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop, | |
4295 | policy); | |
6fdb7cd6 JP |
4296 | |
4297 | free_prefix_s: | |
c9bdf7bd | 4298 | free(prefix_s); |
28dc3fe9 SR |
4299 | } |
4300 | ||
4685e523 | 4301 | static void |
6fdb7cd6 | 4302 | op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast) |
4685e523 JP |
4303 | { |
4304 | if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) { | |
4305 | ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s); | |
4306 | return; | |
4307 | } | |
4308 | ||
4309 | ds_put_cstr(ds, "{"); | |
4310 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
4311 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s); | |
4312 | if (add_bcast) { | |
4313 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s); | |
4314 | } | |
4315 | } | |
4316 | ds_chomp(ds, ' '); | |
4317 | ds_chomp(ds, ','); | |
4318 | ds_put_cstr(ds, "}"); | |
4319 | } | |
4320 | ||
6fdb7cd6 JP |
4321 | static void |
4322 | op_put_v6_networks(struct ds *ds, const struct ovn_port *op) | |
4323 | { | |
4324 | if (op->lrp_networks.n_ipv6_addrs == 1) { | |
4325 | ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s); | |
4326 | return; | |
4327 | } | |
4328 | ||
4329 | ds_put_cstr(ds, "{"); | |
4330 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
4331 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s); | |
4332 | } | |
4333 | ds_chomp(ds, ' '); | |
4334 | ds_chomp(ds, ','); | |
4335 | ds_put_cstr(ds, "}"); | |
4336 | } | |
4337 | ||
65d8810c GS |
4338 | static const char * |
4339 | get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip) | |
4340 | { | |
4341 | char *key = xasprintf("%s_force_snat_ip", key_type); | |
4342 | const char *ip_address = smap_get(&od->nbr->options, key); | |
4343 | free(key); | |
4344 | ||
4345 | if (ip_address) { | |
4346 | ovs_be32 mask; | |
4347 | char *error = ip_parse_masked(ip_address, ip, &mask); | |
4348 | if (error || mask != OVS_BE32_MAX) { | |
4349 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4350 | VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"", | |
4351 | ip_address, UUID_ARGS(&od->key)); | |
4352 | free(error); | |
4353 | *ip = 0; | |
4354 | return NULL; | |
4355 | } | |
4356 | return ip_address; | |
4357 | } | |
4358 | ||
4359 | *ip = 0; | |
4360 | return NULL; | |
4361 | } | |
4362 | ||
4363 | static void | |
4364 | add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, | |
4365 | struct ds *match, struct ds *actions, int priority, | |
6f39e18d | 4366 | const char *lb_force_snat_ip, char *backend_ips, |
485d373b | 4367 | bool is_udp, int addr_family) |
65d8810c GS |
4368 | { |
4369 | /* A match and actions for new connections. */ | |
4370 | char *new_match = xasprintf("ct.new && %s", ds_cstr(match)); | |
4371 | if (lb_force_snat_ip) { | |
4372 | char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s", | |
4373 | ds_cstr(actions)); | |
4374 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match, | |
4375 | new_actions); | |
4376 | free(new_actions); | |
4377 | } else { | |
4378 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match, | |
4379 | ds_cstr(actions)); | |
4380 | } | |
4381 | ||
4382 | /* A match and actions for established connections. */ | |
4383 | char *est_match = xasprintf("ct.est && %s", ds_cstr(match)); | |
4384 | if (lb_force_snat_ip) { | |
4385 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match, | |
4386 | "flags.force_snat_for_lb = 1; ct_dnat;"); | |
4387 | } else { | |
4388 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match, | |
4389 | "ct_dnat;"); | |
4390 | } | |
4391 | ||
4392 | free(new_match); | |
4393 | free(est_match); | |
6f39e18d | 4394 | |
485d373b MM |
4395 | if (!od->l3dgw_port || !od->l3redirect_port || !backend_ips |
4396 | || addr_family != AF_INET) { | |
6f39e18d NS |
4397 | return; |
4398 | } | |
4399 | ||
4400 | /* Add logical flows to UNDNAT the load balanced reverse traffic in | |
4401 | * the router egress pipleine stage - S_ROUTER_OUT_UNDNAT if the logical | |
4402 | * router has a gateway router port associated. | |
4403 | */ | |
4404 | struct ds undnat_match = DS_EMPTY_INITIALIZER; | |
4405 | ds_put_cstr(&undnat_match, "ip4 && ("); | |
4406 | char *start, *next, *ip_str; | |
4407 | start = next = xstrdup(backend_ips); | |
4408 | ip_str = strsep(&next, ","); | |
4409 | bool backend_ips_found = false; | |
4410 | while (ip_str && ip_str[0]) { | |
4411 | char *ip_address = NULL; | |
4412 | uint16_t port = 0; | |
485d373b MM |
4413 | int addr_family; |
4414 | ip_address_and_port_from_lb_key(ip_str, &ip_address, &port, | |
4415 | &addr_family); | |
6f39e18d NS |
4416 | if (!ip_address) { |
4417 | break; | |
4418 | } | |
4419 | ||
4420 | ds_put_format(&undnat_match, "(ip4.src == %s", ip_address); | |
4421 | free(ip_address); | |
4422 | if (port) { | |
4423 | ds_put_format(&undnat_match, " && %s.src == %d) || ", | |
4424 | is_udp ? "udp" : "tcp", port); | |
4425 | } else { | |
4426 | ds_put_cstr(&undnat_match, ") || "); | |
4427 | } | |
4428 | ip_str = strsep(&next, ","); | |
4429 | backend_ips_found = true; | |
4430 | } | |
4431 | ||
4432 | free(start); | |
4433 | if (!backend_ips_found) { | |
4434 | ds_destroy(&undnat_match); | |
4435 | return; | |
4436 | } | |
4437 | ds_chomp(&undnat_match, ' '); | |
4438 | ds_chomp(&undnat_match, '|'); | |
4439 | ds_chomp(&undnat_match, '|'); | |
4440 | ds_chomp(&undnat_match, ' '); | |
4441 | ds_put_format(&undnat_match, ") && outport == %s && " | |
4442 | "is_chassis_resident(%s)", od->l3dgw_port->json_key, | |
4443 | od->l3redirect_port->json_key); | |
4444 | if (lb_force_snat_ip) { | |
4445 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120, | |
4446 | ds_cstr(&undnat_match), | |
4447 | "flags.force_snat_for_lb = 1; ct_dnat;"); | |
4448 | } else { | |
4449 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 120, | |
4450 | ds_cstr(&undnat_match), "ct_dnat;"); | |
4451 | } | |
4452 | ||
4453 | ds_destroy(&undnat_match); | |
65d8810c GS |
4454 | } |
4455 | ||
9975d7be BP |
4456 | static void |
4457 | build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, | |
4458 | struct hmap *lflows) | |
4459 | { | |
4460 | /* This flow table structure is documented in ovn-northd(8), so please | |
4461 | * update ovn-northd.8.xml if you change anything. */ | |
4462 | ||
09b39248 JP |
4463 | struct ds match = DS_EMPTY_INITIALIZER; |
4464 | struct ds actions = DS_EMPTY_INITIALIZER; | |
4465 | ||
9975d7be BP |
4466 | /* Logical router ingress table 0: Admission control framework. */ |
4467 | struct ovn_datapath *od; | |
4468 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
4469 | if (!od->nbr) { | |
4470 | continue; | |
4471 | } | |
4472 | ||
4473 | /* Logical VLANs not supported. | |
4474 | * Broadcast/multicast source address is invalid. */ | |
4475 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, | |
4476 | "vlan.present || eth.src[40]", "drop;"); | |
4477 | } | |
4478 | ||
4479 | /* Logical router ingress table 0: match (priority 50). */ | |
4480 | struct ovn_port *op; | |
4481 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 4482 | if (!op->nbrp) { |
9975d7be BP |
4483 | continue; |
4484 | } | |
4485 | ||
0ee00741 | 4486 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
4487 | /* Drop packets from disabled logical ports (since logical flow |
4488 | * tables are default-drop). */ | |
4489 | continue; | |
4490 | } | |
4491 | ||
41a15b71 MS |
4492 | if (op->derived) { |
4493 | /* No ingress packets should be received on a chassisredirect | |
4494 | * port. */ | |
4495 | continue; | |
4496 | } | |
4497 | ||
4498 | ds_clear(&match); | |
4499 | ds_put_format(&match, "eth.mcast && inport == %s", op->json_key); | |
4500 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, | |
4501 | ds_cstr(&match), "next;"); | |
4502 | ||
09b39248 | 4503 | ds_clear(&match); |
41a15b71 | 4504 | ds_put_format(&match, "eth.dst == %s && inport == %s", |
4685e523 | 4505 | op->lrp_networks.ea_s, op->json_key); |
41a15b71 MS |
4506 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
4507 | && op->od->l3redirect_port) { | |
4508 | /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s | |
4509 | * should only be received on the "redirect-chassis". */ | |
4510 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4511 | op->od->l3redirect_port->json_key); | |
4512 | } | |
9975d7be | 4513 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, |
09b39248 | 4514 | ds_cstr(&match), "next;"); |
9975d7be BP |
4515 | } |
4516 | ||
4517 | /* Logical router ingress table 1: IP Input. */ | |
78aab811 | 4518 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
4519 | if (!od->nbr) { |
4520 | continue; | |
4521 | } | |
4522 | ||
4523 | /* L3 admission control: drop multicast and broadcast source, localhost | |
4524 | * source or destination, and zero network source or destination | |
4525 | * (priority 100). */ | |
4526 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, | |
4527 | "ip4.mcast || " | |
4528 | "ip4.src == 255.255.255.255 || " | |
4529 | "ip4.src == 127.0.0.0/8 || " | |
4530 | "ip4.dst == 127.0.0.0/8 || " | |
4531 | "ip4.src == 0.0.0.0/8 || " | |
4532 | "ip4.dst == 0.0.0.0/8", | |
4533 | "drop;"); | |
4534 | ||
0bac7164 BP |
4535 | /* ARP reply handling. Use ARP replies to populate the logical |
4536 | * router's ARP table. */ | |
4537 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", | |
4538 | "put_arp(inport, arp.spa, arp.sha);"); | |
4539 | ||
9975d7be BP |
4540 | /* Drop Ethernet local broadcast. By definition this traffic should |
4541 | * not be forwarded.*/ | |
4542 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, | |
4543 | "eth.bcast", "drop;"); | |
4544 | ||
9975d7be BP |
4545 | /* TTL discard. |
4546 | * | |
4547 | * XXX Need to send ICMP time exceeded if !ip.later_frag. */ | |
09b39248 JP |
4548 | ds_clear(&match); |
4549 | ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}"); | |
4550 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, | |
4551 | ds_cstr(&match), "drop;"); | |
9975d7be | 4552 | |
c34a87b6 JP |
4553 | /* ND advertisement handling. Use advertisements to populate |
4554 | * the logical router's ARP/ND table. */ | |
4555 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na", | |
4556 | "put_nd(inport, nd.target, nd.tll);"); | |
4557 | ||
4558 | /* Lean from neighbor solicitations that were not directed at | |
4559 | * us. (A priority-90 flow will respond to requests to us and | |
4560 | * learn the sender's mac address. */ | |
4561 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns", | |
4562 | "put_nd(inport, ip6.src, nd.sll);"); | |
4563 | ||
9975d7be BP |
4564 | /* Pass other traffic not already handled to the next table for |
4565 | * routing. */ | |
4566 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); | |
78aab811 JP |
4567 | } |
4568 | ||
6fdb7cd6 | 4569 | /* Logical router ingress table 1: IP Input for IPv4. */ |
9975d7be | 4570 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 4571 | if (!op->nbrp) { |
9975d7be BP |
4572 | continue; |
4573 | } | |
4574 | ||
41a15b71 MS |
4575 | if (op->derived) { |
4576 | /* No ingress packets are accepted on a chassisredirect | |
4577 | * port, so no need to program flows for that port. */ | |
4578 | continue; | |
4579 | } | |
9975d7be | 4580 | |
6fdb7cd6 JP |
4581 | if (op->lrp_networks.n_ipv4_addrs) { |
4582 | /* L3 admission control: drop packets that originate from an | |
4583 | * IPv4 address owned by the router or a broadcast address | |
4584 | * known to the router (priority 100). */ | |
4585 | ds_clear(&match); | |
4586 | ds_put_cstr(&match, "ip4.src == "); | |
4587 | op_put_v4_networks(&match, op, true); | |
06a26dd2 | 4588 | ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); |
6fdb7cd6 JP |
4589 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, |
4590 | ds_cstr(&match), "drop;"); | |
4591 | ||
4592 | /* ICMP echo reply. These flows reply to ICMP echo requests | |
4593 | * received for the router's IP address. Since packets only | |
4594 | * get here as part of the logical router datapath, the inport | |
4595 | * (i.e. the incoming locally attached net) does not matter. | |
4596 | * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ | |
4597 | ds_clear(&match); | |
4598 | ds_put_cstr(&match, "ip4.dst == "); | |
4599 | op_put_v4_networks(&match, op, false); | |
4600 | ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0"); | |
4601 | ||
4602 | ds_clear(&actions); | |
4603 | ds_put_format(&actions, | |
4604 | "ip4.dst <-> ip4.src; " | |
4605 | "ip.ttl = 255; " | |
4606 | "icmp4.type = 0; " | |
bf143492 | 4607 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4608 | "next; "); |
4609 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4610 | ds_cstr(&match), ds_cstr(&actions)); | |
4611 | } | |
dd7652e6 | 4612 | |
9975d7be BP |
4613 | /* ARP reply. These flows reply to ARP requests for the router's own |
4614 | * IP address. */ | |
4685e523 JP |
4615 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
4616 | ds_clear(&match); | |
4617 | ds_put_format(&match, | |
4618 | "inport == %s && arp.tpa == %s && arp.op == 1", | |
4619 | op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s); | |
41a15b71 MS |
4620 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
4621 | && op->od->l3redirect_port) { | |
4622 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4623 | * should only be sent from the "redirect-chassis", so that | |
4624 | * upstream MAC learning points to the "redirect-chassis". | |
4625 | * Also need to avoid generation of multiple ARP responses | |
4626 | * from different chassis. */ | |
4627 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4628 | op->od->l3redirect_port->json_key); | |
4629 | } | |
4685e523 JP |
4630 | |
4631 | ds_clear(&actions); | |
4632 | ds_put_format(&actions, | |
4633 | "eth.dst = eth.src; " | |
4634 | "eth.src = %s; " | |
4635 | "arp.op = 2; /* ARP reply */ " | |
4636 | "arp.tha = arp.sha; " | |
4637 | "arp.sha = %s; " | |
4638 | "arp.tpa = arp.spa; " | |
4639 | "arp.spa = %s; " | |
4640 | "outport = %s; " | |
bf143492 | 4641 | "flags.loopback = 1; " |
4685e523 JP |
4642 | "output;", |
4643 | op->lrp_networks.ea_s, | |
4644 | op->lrp_networks.ea_s, | |
4645 | op->lrp_networks.ipv4_addrs[i].addr_s, | |
4646 | op->json_key); | |
4647 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4648 | ds_cstr(&match), ds_cstr(&actions)); | |
4649 | } | |
9975d7be | 4650 | |
cc4583aa GS |
4651 | /* A set to hold all load-balancer vips that need ARP responses. */ |
4652 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
485d373b MM |
4653 | int addr_family; |
4654 | get_router_load_balancer_ips(op->od, &all_ips, &addr_family); | |
cc4583aa GS |
4655 | |
4656 | const char *ip_address; | |
4657 | SSET_FOR_EACH(ip_address, &all_ips) { | |
cc4583aa | 4658 | ds_clear(&match); |
485d373b MM |
4659 | if (addr_family == AF_INET) { |
4660 | ds_put_format(&match, | |
4661 | "inport == %s && arp.tpa == %s && arp.op == 1", | |
4662 | op->json_key, ip_address); | |
4663 | } else { | |
4664 | ds_put_format(&match, | |
4665 | "inport == %s && nd_ns && nd.target == %s", | |
4666 | op->json_key, ip_address); | |
4667 | } | |
cc4583aa GS |
4668 | |
4669 | ds_clear(&actions); | |
485d373b MM |
4670 | if (addr_family == AF_INET) { |
4671 | ds_put_format(&actions, | |
cc4583aa GS |
4672 | "eth.dst = eth.src; " |
4673 | "eth.src = %s; " | |
4674 | "arp.op = 2; /* ARP reply */ " | |
4675 | "arp.tha = arp.sha; " | |
4676 | "arp.sha = %s; " | |
4677 | "arp.tpa = arp.spa; " | |
485d373b | 4678 | "arp.spa = %s; " |
cc4583aa GS |
4679 | "outport = %s; " |
4680 | "flags.loopback = 1; " | |
4681 | "output;", | |
4682 | op->lrp_networks.ea_s, | |
4683 | op->lrp_networks.ea_s, | |
485d373b | 4684 | ip_address, |
cc4583aa | 4685 | op->json_key); |
485d373b MM |
4686 | } else { |
4687 | ds_put_format(&actions, | |
4688 | "nd_na { " | |
4689 | "eth.src = %s; " | |
4690 | "ip6.src = %s; " | |
4691 | "nd.target = %s; " | |
4692 | "nd.tll = %s; " | |
4693 | "outport = inport; " | |
4694 | "flags.loopback = 1; " | |
4695 | "output; " | |
4696 | "};", | |
4697 | op->lrp_networks.ea_s, | |
4698 | ip_address, | |
4699 | ip_address, | |
4700 | op->lrp_networks.ea_s); | |
4701 | } | |
cc4583aa GS |
4702 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, |
4703 | ds_cstr(&match), ds_cstr(&actions)); | |
4704 | } | |
4705 | ||
4706 | sset_destroy(&all_ips); | |
4707 | ||
65d8810c GS |
4708 | /* A gateway router can have 2 SNAT IP addresses to force DNATed and |
4709 | * LBed traffic respectively to be SNATed. In addition, there can be | |
4710 | * a number of SNAT rules in the NAT table. */ | |
4711 | ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * | |
4712 | (op->od->nbr->n_nat + 2)); | |
dde5ea7b | 4713 | size_t n_snat_ips = 0; |
65d8810c GS |
4714 | |
4715 | ovs_be32 snat_ip; | |
4716 | const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat", | |
4717 | &snat_ip); | |
4718 | if (dnat_force_snat_ip) { | |
4719 | snat_ips[n_snat_ips++] = snat_ip; | |
4720 | } | |
4721 | ||
4722 | const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb", | |
4723 | &snat_ip); | |
4724 | if (lb_force_snat_ip) { | |
4725 | snat_ips[n_snat_ips++] = snat_ip; | |
4726 | } | |
4727 | ||
de297547 GS |
4728 | for (int i = 0; i < op->od->nbr->n_nat; i++) { |
4729 | const struct nbrec_nat *nat; | |
4730 | ||
4731 | nat = op->od->nbr->nat[i]; | |
4732 | ||
de297547 GS |
4733 | ovs_be32 ip; |
4734 | if (!ip_parse(nat->external_ip, &ip) || !ip) { | |
4735 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
dde5ea7b | 4736 | VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration " |
de297547 GS |
4737 | "for router %s", nat->external_ip, op->key); |
4738 | continue; | |
4739 | } | |
4740 | ||
dde5ea7b GS |
4741 | if (!strcmp(nat->type, "snat")) { |
4742 | snat_ips[n_snat_ips++] = ip; | |
4743 | continue; | |
4744 | } | |
4745 | ||
4746 | /* ARP handling for external IP addresses. | |
4747 | * | |
4748 | * DNAT IP addresses are external IP addresses that need ARP | |
4749 | * handling. */ | |
09b39248 JP |
4750 | ds_clear(&match); |
4751 | ds_put_format(&match, | |
4752 | "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", | |
4753 | op->json_key, IP_ARGS(ip)); | |
4685e523 | 4754 | |
09b39248 JP |
4755 | ds_clear(&actions); |
4756 | ds_put_format(&actions, | |
de297547 | 4757 | "eth.dst = eth.src; " |
de297547 | 4758 | "arp.op = 2; /* ARP reply */ " |
06a26dd2 MS |
4759 | "arp.tha = arp.sha; "); |
4760 | ||
4761 | if (op->od->l3dgw_port && op == op->od->l3dgw_port) { | |
4762 | struct eth_addr mac; | |
4763 | if (nat->external_mac && | |
4764 | eth_addr_from_string(nat->external_mac, &mac) | |
4765 | && nat->logical_port) { | |
4766 | /* distributed NAT case, use nat->external_mac */ | |
4767 | ds_put_format(&actions, | |
4768 | "eth.src = "ETH_ADDR_FMT"; " | |
4769 | "arp.sha = "ETH_ADDR_FMT"; ", | |
4770 | ETH_ADDR_ARGS(mac), | |
4771 | ETH_ADDR_ARGS(mac)); | |
4772 | /* Traffic with eth.src = nat->external_mac should only be | |
4773 | * sent from the chassis where nat->logical_port is | |
4774 | * resident, so that upstream MAC learning points to the | |
4775 | * correct chassis. Also need to avoid generation of | |
4776 | * multiple ARP responses from different chassis. */ | |
4777 | ds_put_format(&match, " && is_chassis_resident(\"%s\")", | |
4778 | nat->logical_port); | |
4779 | } else { | |
4780 | ds_put_format(&actions, | |
4781 | "eth.src = %s; " | |
4782 | "arp.sha = %s; ", | |
4783 | op->lrp_networks.ea_s, | |
4784 | op->lrp_networks.ea_s); | |
4785 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4786 | * should only be sent from the "redirect-chassis", so that | |
4787 | * upstream MAC learning points to the "redirect-chassis". | |
4788 | * Also need to avoid generation of multiple ARP responses | |
4789 | * from different chassis. */ | |
4790 | if (op->od->l3redirect_port) { | |
4791 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4792 | op->od->l3redirect_port->json_key); | |
4793 | } | |
4794 | } | |
4795 | } else { | |
4796 | ds_put_format(&actions, | |
4797 | "eth.src = %s; " | |
4798 | "arp.sha = %s; ", | |
4799 | op->lrp_networks.ea_s, | |
4800 | op->lrp_networks.ea_s); | |
4801 | } | |
4802 | ds_put_format(&actions, | |
de297547 GS |
4803 | "arp.tpa = arp.spa; " |
4804 | "arp.spa = "IP_FMT"; " | |
4805 | "outport = %s; " | |
bf143492 | 4806 | "flags.loopback = 1; " |
de297547 | 4807 | "output;", |
de297547 GS |
4808 | IP_ARGS(ip), |
4809 | op->json_key); | |
4810 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
09b39248 | 4811 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
4812 | } |
4813 | ||
4685e523 JP |
4814 | ds_clear(&match); |
4815 | ds_put_cstr(&match, "ip4.dst == {"); | |
4816 | bool has_drop_ips = false; | |
4817 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
49da9ec0 | 4818 | bool snat_ip_is_router_ip = false; |
dde5ea7b GS |
4819 | for (int j = 0; j < n_snat_ips; j++) { |
4820 | /* Packets to SNAT IPs should not be dropped. */ | |
4821 | if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) { | |
49da9ec0 CSV |
4822 | snat_ip_is_router_ip = true; |
4823 | break; | |
4685e523 | 4824 | } |
4ef48e9d | 4825 | } |
49da9ec0 CSV |
4826 | if (snat_ip_is_router_ip) { |
4827 | continue; | |
4828 | } | |
4685e523 JP |
4829 | ds_put_format(&match, "%s, ", |
4830 | op->lrp_networks.ipv4_addrs[i].addr_s); | |
4831 | has_drop_ips = true; | |
4ef48e9d | 4832 | } |
4685e523 JP |
4833 | ds_chomp(&match, ' '); |
4834 | ds_chomp(&match, ','); | |
4835 | ds_put_cstr(&match, "}"); | |
4ef48e9d | 4836 | |
4685e523 JP |
4837 | if (has_drop_ips) { |
4838 | /* Drop IP traffic to this router. */ | |
09b39248 JP |
4839 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, |
4840 | ds_cstr(&match), "drop;"); | |
4ef48e9d | 4841 | } |
4685e523 | 4842 | |
dde5ea7b | 4843 | free(snat_ips); |
9975d7be BP |
4844 | } |
4845 | ||
6fdb7cd6 JP |
4846 | /* Logical router ingress table 1: IP Input for IPv6. */ |
4847 | HMAP_FOR_EACH (op, key_node, ports) { | |
4848 | if (!op->nbrp) { | |
4849 | continue; | |
4850 | } | |
4851 | ||
41a15b71 MS |
4852 | if (op->derived) { |
4853 | /* No ingress packets are accepted on a chassisredirect | |
4854 | * port, so no need to program flows for that port. */ | |
4855 | continue; | |
4856 | } | |
4857 | ||
6fdb7cd6 JP |
4858 | if (op->lrp_networks.n_ipv6_addrs) { |
4859 | /* L3 admission control: drop packets that originate from an | |
4860 | * IPv6 address owned by the router (priority 100). */ | |
4861 | ds_clear(&match); | |
4862 | ds_put_cstr(&match, "ip6.src == "); | |
4863 | op_put_v6_networks(&match, op); | |
4864 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, | |
4865 | ds_cstr(&match), "drop;"); | |
4866 | ||
4867 | /* ICMPv6 echo reply. These flows reply to echo requests | |
4868 | * received for the router's IP address. */ | |
4869 | ds_clear(&match); | |
4870 | ds_put_cstr(&match, "ip6.dst == "); | |
4871 | op_put_v6_networks(&match, op); | |
4872 | ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0"); | |
4873 | ||
4874 | ds_clear(&actions); | |
4875 | ds_put_cstr(&actions, | |
4876 | "ip6.dst <-> ip6.src; " | |
4877 | "ip.ttl = 255; " | |
4878 | "icmp6.type = 129; " | |
bf143492 | 4879 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4880 | "next; "); |
4881 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4882 | ds_cstr(&match), ds_cstr(&actions)); | |
4883 | ||
4884 | /* Drop IPv6 traffic to this router. */ | |
4885 | ds_clear(&match); | |
4886 | ds_put_cstr(&match, "ip6.dst == "); | |
4887 | op_put_v6_networks(&match, op); | |
4888 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, | |
4889 | ds_cstr(&match), "drop;"); | |
4890 | } | |
4891 | ||
4892 | /* ND reply. These flows reply to ND solicitations for the | |
4893 | * router's own IP address. */ | |
4894 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
4895 | ds_clear(&match); | |
4896 | ds_put_format(&match, | |
4897 | "inport == %s && nd_ns && ip6.dst == {%s, %s} " | |
4898 | "&& nd.target == %s", | |
4899 | op->json_key, | |
4900 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4901 | op->lrp_networks.ipv6_addrs[i].sn_addr_s, | |
4902 | op->lrp_networks.ipv6_addrs[i].addr_s); | |
41a15b71 MS |
4903 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
4904 | && op->od->l3redirect_port) { | |
4905 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4906 | * should only be sent from the "redirect-chassis", so that | |
4907 | * upstream MAC learning points to the "redirect-chassis". | |
4908 | * Also need to avoid generation of multiple ND replies | |
4909 | * from different chassis. */ | |
4910 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4911 | op->od->l3redirect_port->json_key); | |
4912 | } | |
6fdb7cd6 JP |
4913 | |
4914 | ds_clear(&actions); | |
4915 | ds_put_format(&actions, | |
c34a87b6 | 4916 | "put_nd(inport, ip6.src, nd.sll); " |
6fdb7cd6 JP |
4917 | "nd_na { " |
4918 | "eth.src = %s; " | |
4919 | "ip6.src = %s; " | |
4920 | "nd.target = %s; " | |
4921 | "nd.tll = %s; " | |
4922 | "outport = inport; " | |
bf143492 | 4923 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4924 | "output; " |
4925 | "};", | |
4926 | op->lrp_networks.ea_s, | |
4927 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4928 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4929 | op->lrp_networks.ea_s); | |
4930 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4931 | ds_cstr(&match), ds_cstr(&actions)); | |
4932 | } | |
4933 | } | |
4934 | ||
06a26dd2 | 4935 | /* NAT, Defrag and load balancing. */ |
de297547 GS |
4936 | HMAP_FOR_EACH (od, key_node, datapaths) { |
4937 | if (!od->nbr) { | |
4938 | continue; | |
4939 | } | |
4940 | ||
4941 | /* Packets are allowed by default. */ | |
cc4583aa | 4942 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); |
de297547 GS |
4943 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); |
4944 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); | |
4945 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); | |
06a26dd2 MS |
4946 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); |
4947 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); | |
de297547 | 4948 | |
06a26dd2 MS |
4949 | /* NAT rules are only valid on Gateway routers and routers with |
4950 | * l3dgw_port (router has a port with "redirect-chassis" | |
4951 | * specified). */ | |
4952 | if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { | |
de297547 GS |
4953 | continue; |
4954 | } | |
4955 | ||
65d8810c GS |
4956 | ovs_be32 snat_ip; |
4957 | const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat", | |
4958 | &snat_ip); | |
4959 | const char *lb_force_snat_ip = get_force_snat_ip(od, "lb", | |
4960 | &snat_ip); | |
4961 | ||
de297547 GS |
4962 | for (int i = 0; i < od->nbr->n_nat; i++) { |
4963 | const struct nbrec_nat *nat; | |
4964 | ||
4965 | nat = od->nbr->nat[i]; | |
4966 | ||
4967 | ovs_be32 ip, mask; | |
4968 | ||
4969 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
4970 | if (error || mask != OVS_BE32_MAX) { | |
4971 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4972 | VLOG_WARN_RL(&rl, "bad external ip %s for nat", | |
4973 | nat->external_ip); | |
4974 | free(error); | |
4975 | continue; | |
4976 | } | |
4977 | ||
4978 | /* Check the validity of nat->logical_ip. 'logical_ip' can | |
4979 | * be a subnet when the type is "snat". */ | |
4980 | error = ip_parse_masked(nat->logical_ip, &ip, &mask); | |
4981 | if (!strcmp(nat->type, "snat")) { | |
4982 | if (error) { | |
4983 | static struct vlog_rate_limit rl = | |
4984 | VLOG_RATE_LIMIT_INIT(5, 1); | |
4985 | VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " | |
4986 | "in router "UUID_FMT"", | |
4987 | nat->logical_ip, UUID_ARGS(&od->key)); | |
4988 | free(error); | |
4989 | continue; | |
4990 | } | |
4991 | } else { | |
4992 | if (error || mask != OVS_BE32_MAX) { | |
4993 | static struct vlog_rate_limit rl = | |
4994 | VLOG_RATE_LIMIT_INIT(5, 1); | |
4995 | VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " | |
4996 | ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); | |
4997 | free(error); | |
4998 | continue; | |
4999 | } | |
5000 | } | |
5001 | ||
06a26dd2 MS |
5002 | /* For distributed router NAT, determine whether this NAT rule |
5003 | * satisfies the conditions for distributed NAT processing. */ | |
5004 | bool distributed = false; | |
5005 | struct eth_addr mac; | |
5006 | if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && | |
5007 | nat->logical_port && nat->external_mac) { | |
5008 | if (eth_addr_from_string(nat->external_mac, &mac)) { | |
5009 | distributed = true; | |
5010 | } else { | |
5011 | static struct vlog_rate_limit rl = | |
5012 | VLOG_RATE_LIMIT_INIT(5, 1); | |
5013 | VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " | |
5014 | ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); | |
5015 | continue; | |
5016 | } | |
5017 | } | |
5018 | ||
de297547 GS |
5019 | /* Ingress UNSNAT table: It is for already established connections' |
5020 | * reverse traffic. i.e., SNAT has already been done in egress | |
5021 | * pipeline and now the packet has entered the ingress pipeline as | |
5022 | * part of a reply. We undo the SNAT here. | |
5023 | * | |
5024 | * Undoing SNAT has to happen before DNAT processing. This is | |
5025 | * because when the packet was DNATed in ingress pipeline, it did | |
5026 | * not know about the possibility of eventual additional SNAT in | |
5027 | * egress pipeline. */ | |
5028 | if (!strcmp(nat->type, "snat") | |
5029 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
5030 | if (!od->l3dgw_port) { |
5031 | /* Gateway router. */ | |
5032 | ds_clear(&match); | |
5033 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5034 | nat->external_ip); | |
5035 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90, | |
5036 | ds_cstr(&match), "ct_snat; next;"); | |
5037 | } else { | |
5038 | /* Distributed router. */ | |
5039 | ||
5040 | /* Traffic received on l3dgw_port is subject to NAT. */ | |
5041 | ds_clear(&match); | |
5042 | ds_put_format(&match, "ip && ip4.dst == %s" | |
5043 | " && inport == %s", | |
5044 | nat->external_ip, | |
5045 | od->l3dgw_port->json_key); | |
5046 | if (!distributed && od->l3redirect_port) { | |
5047 | /* Flows for NAT rules that are centralized are only | |
5048 | * programmed on the "redirect-chassis". */ | |
5049 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
5050 | od->l3redirect_port->json_key); | |
5051 | } | |
5052 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, | |
5053 | ds_cstr(&match), "ct_snat;"); | |
5054 | ||
5055 | /* Traffic received on other router ports must be | |
5056 | * redirected to the central instance of the l3dgw_port | |
5057 | * for NAT processing. */ | |
5058 | ds_clear(&match); | |
5059 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5060 | nat->external_ip); | |
5061 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50, | |
5062 | ds_cstr(&match), | |
5063 | REGBIT_NAT_REDIRECT" = 1; next;"); | |
5064 | } | |
de297547 GS |
5065 | } |
5066 | ||
5067 | /* Ingress DNAT table: Packets enter the pipeline with destination | |
5068 | * IP address that needs to be DNATted from a external IP address | |
5069 | * to a logical IP address. */ | |
5070 | if (!strcmp(nat->type, "dnat") | |
5071 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
5072 | if (!od->l3dgw_port) { |
5073 | /* Gateway router. */ | |
5074 | /* Packet when it goes from the initiator to destination. | |
5075 | * We need to set flags.loopback because the router can | |
5076 | * send the packet back through the same interface. */ | |
5077 | ds_clear(&match); | |
5078 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5079 | nat->external_ip); | |
5080 | ds_clear(&actions); | |
5081 | if (dnat_force_snat_ip) { | |
5082 | /* Indicate to the future tables that a DNAT has taken | |
5083 | * place and a force SNAT needs to be done in the | |
5084 | * Egress SNAT table. */ | |
5085 | ds_put_format(&actions, | |
5086 | "flags.force_snat_for_dnat = 1; "); | |
5087 | } | |
5088 | ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);", | |
5089 | nat->logical_ip); | |
5090 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, | |
5091 | ds_cstr(&match), ds_cstr(&actions)); | |
5092 | } else { | |
5093 | /* Distributed router. */ | |
5094 | ||
5095 | /* Traffic received on l3dgw_port is subject to NAT. */ | |
5096 | ds_clear(&match); | |
5097 | ds_put_format(&match, "ip && ip4.dst == %s" | |
5098 | " && inport == %s", | |
5099 | nat->external_ip, | |
5100 | od->l3dgw_port->json_key); | |
5101 | if (!distributed && od->l3redirect_port) { | |
5102 | /* Flows for NAT rules that are centralized are only | |
5103 | * programmed on the "redirect-chassis". */ | |
5104 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
5105 | od->l3redirect_port->json_key); | |
5106 | } | |
5107 | ds_clear(&actions); | |
5108 | ds_put_format(&actions, "ct_dnat(%s);", | |
5109 | nat->logical_ip); | |
5110 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, | |
5111 | ds_cstr(&match), ds_cstr(&actions)); | |
5112 | ||
5113 | /* Traffic received on other router ports must be | |
5114 | * redirected to the central instance of the l3dgw_port | |
5115 | * for NAT processing. */ | |
5116 | ds_clear(&match); | |
5117 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5118 | nat->external_ip); | |
5119 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
5120 | ds_cstr(&match), | |
5121 | REGBIT_NAT_REDIRECT" = 1; next;"); | |
5122 | } | |
5123 | } | |
5124 | ||
5125 | /* Egress UNDNAT table: It is for already established connections' | |
5126 | * reverse traffic. i.e., DNAT has already been done in ingress | |
5127 | * pipeline and now the packet has entered the egress pipeline as | |
5128 | * part of a reply. We undo the DNAT here. | |
5129 | * | |
5130 | * Note that this only applies for NAT on a distributed router. | |
5131 | * Undo DNAT on a gateway router is done in the ingress DNAT | |
5132 | * pipeline stage. */ | |
5133 | if (od->l3dgw_port && (!strcmp(nat->type, "dnat") | |
5134 | || !strcmp(nat->type, "dnat_and_snat"))) { | |
09b39248 | 5135 | ds_clear(&match); |
06a26dd2 MS |
5136 | ds_put_format(&match, "ip && ip4.src == %s" |
5137 | " && outport == %s", | |
5138 | nat->logical_ip, | |
5139 | od->l3dgw_port->json_key); | |
5140 | if (!distributed && od->l3redirect_port) { | |
5141 | /* Flows for NAT rules that are centralized are only | |
5142 | * programmed on the "redirect-chassis". */ | |
5143 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
5144 | od->l3redirect_port->json_key); | |
5145 | } | |
09b39248 | 5146 | ds_clear(&actions); |
06a26dd2 MS |
5147 | if (distributed) { |
5148 | ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", | |
5149 | ETH_ADDR_ARGS(mac)); | |
65d8810c | 5150 | } |
06a26dd2 MS |
5151 | ds_put_format(&actions, "ct_dnat;"); |
5152 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100, | |
09b39248 | 5153 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
5154 | } |
5155 | ||
5156 | /* Egress SNAT table: Packets enter the egress pipeline with | |
5157 | * source ip address that needs to be SNATted to a external ip | |
5158 | * address. */ | |
5159 | if (!strcmp(nat->type, "snat") | |
5160 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
5161 | if (!od->l3dgw_port) { |
5162 | /* Gateway router. */ | |
5163 | ds_clear(&match); | |
5164 | ds_put_format(&match, "ip && ip4.src == %s", | |
5165 | nat->logical_ip); | |
5166 | ds_clear(&actions); | |
5167 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
5168 | ||
5169 | /* The priority here is calculated such that the | |
5170 | * nat->logical_ip with the longest mask gets a higher | |
5171 | * priority. */ | |
5172 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
5173 | count_1bits(ntohl(mask)) + 1, | |
5174 | ds_cstr(&match), ds_cstr(&actions)); | |
5175 | } else { | |
5176 | /* Distributed router. */ | |
5177 | ds_clear(&match); | |
5178 | ds_put_format(&match, "ip && ip4.src == %s" | |
5179 | " && outport == %s", | |
5180 | nat->logical_ip, | |
5181 | od->l3dgw_port->json_key); | |
5182 | if (!distributed && od->l3redirect_port) { | |
5183 | /* Flows for NAT rules that are centralized are only | |
5184 | * programmed on the "redirect-chassis". */ | |
5185 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
5186 | od->l3redirect_port->json_key); | |
5187 | } | |
5188 | ds_clear(&actions); | |
5189 | if (distributed) { | |
5190 | ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", | |
5191 | ETH_ADDR_ARGS(mac)); | |
5192 | } | |
5193 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
5194 | ||
5195 | /* The priority here is calculated such that the | |
5196 | * nat->logical_ip with the longest mask gets a higher | |
5197 | * priority. */ | |
5198 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
5199 | count_1bits(ntohl(mask)) + 1, | |
5200 | ds_cstr(&match), ds_cstr(&actions)); | |
5201 | } | |
5202 | } | |
5203 | ||
5204 | /* Logical router ingress table 0: | |
5205 | * For NAT on a distributed router, add rules allowing | |
5206 | * ingress traffic with eth.dst matching nat->external_mac | |
5207 | * on the l3dgw_port instance where nat->logical_port is | |
5208 | * resident. */ | |
5209 | if (distributed) { | |
09b39248 | 5210 | ds_clear(&match); |
06a26dd2 MS |
5211 | ds_put_format(&match, |
5212 | "eth.dst == "ETH_ADDR_FMT" && inport == %s" | |
5213 | " && is_chassis_resident(\"%s\")", | |
5214 | ETH_ADDR_ARGS(mac), | |
5215 | od->l3dgw_port->json_key, | |
5216 | nat->logical_port); | |
5217 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50, | |
5218 | ds_cstr(&match), "next;"); | |
5219 | } | |
5220 | ||
5221 | /* Ingress Gateway Redirect Table: For NAT on a distributed | |
5222 | * router, add flows that are specific to a NAT rule. These | |
5223 | * flows indicate the presence of an applicable NAT rule that | |
5224 | * can be applied in a distributed manner. */ | |
5225 | if (distributed) { | |
5226 | ds_clear(&match); | |
5227 | ds_put_format(&match, "ip4.src == %s && outport == %s", | |
5228 | nat->logical_ip, | |
5229 | od->l3dgw_port->json_key); | |
5230 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100, | |
5231 | ds_cstr(&match), "next;"); | |
5232 | } | |
de297547 | 5233 | |
06a26dd2 MS |
5234 | /* Egress Loopback table: For NAT on a distributed router. |
5235 | * If packets in the egress pipeline on the distributed | |
5236 | * gateway port have ip.dst matching a NAT external IP, then | |
5237 | * loop a clone of the packet back to the beginning of the | |
5238 | * ingress pipeline with inport = outport. */ | |
5239 | if (od->l3dgw_port) { | |
5240 | /* Distributed router. */ | |
5241 | ds_clear(&match); | |
5242 | ds_put_format(&match, "ip4.dst == %s && outport == %s", | |
5243 | nat->external_ip, | |
5244 | od->l3dgw_port->json_key); | |
5245 | ds_clear(&actions); | |
5246 | ds_put_format(&actions, | |
5247 | "clone { ct_clear; " | |
5248 | "inport = outport; outport = \"\"; " | |
5249 | "flags = 0; flags.loopback = 1; "); | |
71f21279 BP |
5250 | for (int j = 0; j < MFF_N_LOG_REGS; j++) { |
5251 | ds_put_format(&actions, "reg%d = 0; ", j); | |
06a26dd2 MS |
5252 | } |
5253 | ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; " | |
5254 | "next(pipeline=ingress, table=0); };"); | |
5255 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, | |
09b39248 | 5256 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
5257 | } |
5258 | } | |
5259 | ||
65d8810c | 5260 | /* Handle force SNAT options set in the gateway router. */ |
06a26dd2 | 5261 | if (dnat_force_snat_ip && !od->l3dgw_port) { |
65d8810c GS |
5262 | /* If a packet with destination IP address as that of the |
5263 | * gateway router (as set in options:dnat_force_snat_ip) is seen, | |
5264 | * UNSNAT it. */ | |
5265 | ds_clear(&match); | |
5266 | ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip); | |
5267 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110, | |
5268 | ds_cstr(&match), "ct_snat; next;"); | |
5269 | ||
5270 | /* Higher priority rules to force SNAT with the IP addresses | |
5271 | * configured in the Gateway router. This only takes effect | |
5272 | * when the packet has already been DNATed once. */ | |
5273 | ds_clear(&match); | |
5274 | ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip"); | |
5275 | ds_clear(&actions); | |
5276 | ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip); | |
5277 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, | |
5278 | ds_cstr(&match), ds_cstr(&actions)); | |
5279 | } | |
06a26dd2 | 5280 | if (lb_force_snat_ip && !od->l3dgw_port) { |
65d8810c GS |
5281 | /* If a packet with destination IP address as that of the |
5282 | * gateway router (as set in options:lb_force_snat_ip) is seen, | |
5283 | * UNSNAT it. */ | |
5284 | ds_clear(&match); | |
5285 | ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip); | |
5286 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, | |
5287 | ds_cstr(&match), "ct_snat; next;"); | |
5288 | ||
5289 | /* Load balanced traffic will have flags.force_snat_for_lb set. | |
5290 | * Force SNAT it. */ | |
5291 | ds_clear(&match); | |
5292 | ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip"); | |
5293 | ds_clear(&actions); | |
5294 | ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip); | |
5295 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, | |
5296 | ds_cstr(&match), ds_cstr(&actions)); | |
5297 | } | |
5298 | ||
06a26dd2 MS |
5299 | if (!od->l3dgw_port) { |
5300 | /* For gateway router, re-circulate every packet through | |
5301 | * the DNAT zone. This helps with two things. | |
5302 | * | |
5303 | * 1. Any packet that needs to be unDNATed in the reverse | |
5304 | * direction gets unDNATed. Ideally this could be done in | |
5305 | * the egress pipeline. But since the gateway router | |
5306 | * does not have any feature that depends on the source | |
5307 | * ip address being external IP address for IP routing, | |
5308 | * we can do it here, saving a future re-circulation. | |
5309 | * | |
5310 | * 2. Any packet that was sent through SNAT zone in the | |
5311 | * previous table automatically gets re-circulated to get | |
5312 | * back the new destination IP address that is needed for | |
5313 | * routing in the openflow pipeline. */ | |
5314 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
5315 | "ip", "flags.loopback = 1; ct_dnat;"); | |
5316 | } else { | |
5317 | /* For NAT on a distributed router, add flows to Ingress | |
5318 | * IP Routing table, Ingress ARP Resolution table, and | |
5319 | * Ingress Gateway Redirect Table that are not specific to a | |
5320 | * NAT rule. */ | |
5321 | ||
5322 | /* The highest priority IN_IP_ROUTING rule matches packets | |
5323 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages), | |
5324 | * with action "ip.ttl--; next;". The IN_GW_REDIRECT table | |
5325 | * will take care of setting the outport. */ | |
5326 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300, | |
5327 | REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;"); | |
5328 | ||
5329 | /* The highest priority IN_ARP_RESOLVE rule matches packets | |
5330 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages), | |
5331 | * then sets eth.dst to the distributed gateway port's | |
5332 | * ethernet address. */ | |
5333 | ds_clear(&actions); | |
5334 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5335 | od->l3dgw_port->lrp_networks.ea_s); | |
5336 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200, | |
5337 | REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions)); | |
5338 | ||
5339 | /* The highest priority IN_GW_REDIRECT rule redirects packets | |
5340 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to | |
5341 | * the central instance of the l3dgw_port for NAT processing. */ | |
5342 | ds_clear(&actions); | |
5343 | ds_put_format(&actions, "outport = %s; next;", | |
5344 | od->l3redirect_port->json_key); | |
5345 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200, | |
5346 | REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions)); | |
5347 | } | |
5348 | ||
5349 | /* Load balancing and packet defrag are only valid on | |
6f39e18d NS |
5350 | * Gateway routers or router with gateway port. */ |
5351 | if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { | |
06a26dd2 MS |
5352 | continue; |
5353 | } | |
8697d426 MS |
5354 | |
5355 | /* A set to hold all ips that need defragmentation and tracking. */ | |
5356 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
5357 | ||
5358 | for (int i = 0; i < od->nbr->n_load_balancer; i++) { | |
5359 | struct nbrec_load_balancer *lb = od->nbr->load_balancer[i]; | |
5360 | struct smap *vips = &lb->vips; | |
5361 | struct smap_node *node; | |
5362 | ||
5363 | SMAP_FOR_EACH (node, vips) { | |
5364 | uint16_t port = 0; | |
485d373b | 5365 | int addr_family; |
8697d426 MS |
5366 | |
5367 | /* node->key contains IP:port or just IP. */ | |
5368 | char *ip_address = NULL; | |
485d373b MM |
5369 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port, |
5370 | &addr_family); | |
8697d426 MS |
5371 | if (!ip_address) { |
5372 | continue; | |
5373 | } | |
5374 | ||
5375 | if (!sset_contains(&all_ips, ip_address)) { | |
5376 | sset_add(&all_ips, ip_address); | |
485d373b MM |
5377 | /* If there are any load balancing rules, we should send |
5378 | * the packet to conntrack for defragmentation and | |
5379 | * tracking. This helps with two things. | |
5380 | * | |
5381 | * 1. With tracking, we can send only new connections to | |
5382 | * pick a DNAT ip address from a group. | |
5383 | * 2. If there are L4 ports in load balancing rules, we | |
5384 | * need the defragmentation to match on L4 ports. */ | |
5385 | ds_clear(&match); | |
5386 | if (addr_family == AF_INET) { | |
5387 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5388 | ip_address); | |
5389 | } else { | |
5390 | ds_put_format(&match, "ip && ip6.dst == %s", | |
5391 | ip_address); | |
5392 | } | |
5393 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, | |
5394 | 100, ds_cstr(&match), "ct_next;"); | |
8697d426 MS |
5395 | } |
5396 | ||
5397 | /* Higher priority rules are added for load-balancing in DNAT | |
5398 | * table. For every match (on a VIP[:port]), we add two flows | |
5399 | * via add_router_lb_flow(). One flow is for specific matching | |
5400 | * on ct.new with an action of "ct_lb($targets);". The other | |
5401 | * flow is for ct.est with an action of "ct_dnat;". */ | |
5402 | ds_clear(&actions); | |
5403 | ds_put_format(&actions, "ct_lb(%s);", node->value); | |
5404 | ||
5405 | ds_clear(&match); | |
485d373b MM |
5406 | if (addr_family == AF_INET) { |
5407 | ds_put_format(&match, "ip && ip4.dst == %s", | |
5408 | ip_address); | |
5409 | } else { | |
5410 | ds_put_format(&match, "ip && ip6.dst == %s", | |
5411 | ip_address); | |
5412 | } | |
8697d426 MS |
5413 | free(ip_address); |
5414 | ||
6f39e18d NS |
5415 | int prio = 110; |
5416 | bool is_udp = lb->protocol && !strcmp(lb->protocol, "udp") ? | |
5417 | true : false; | |
8697d426 | 5418 | if (port) { |
6f39e18d | 5419 | if (is_udp) { |
8697d426 MS |
5420 | ds_put_format(&match, " && udp && udp.dst == %d", |
5421 | port); | |
5422 | } else { | |
5423 | ds_put_format(&match, " && tcp && tcp.dst == %d", | |
5424 | port); | |
5425 | } | |
6f39e18d NS |
5426 | prio = 120; |
5427 | } | |
5428 | ||
5429 | if (od->l3redirect_port) { | |
5430 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
5431 | od->l3redirect_port->json_key); | |
8697d426 | 5432 | } |
6f39e18d | 5433 | add_router_lb_flow(lflows, od, &match, &actions, prio, |
485d373b MM |
5434 | lb_force_snat_ip, node->value, is_udp, |
5435 | addr_family); | |
8697d426 MS |
5436 | } |
5437 | } | |
8697d426 | 5438 | sset_destroy(&all_ips); |
de297547 GS |
5439 | } |
5440 | ||
4364646c ZKL |
5441 | /* Logical router ingress table 5 and 6: IPv6 Router Adv (RA) options and |
5442 | * response. */ | |
5443 | HMAP_FOR_EACH (op, key_node, ports) { | |
5444 | if (!op->nbrp || op->nbrp->peer || !op->peer) { | |
5445 | continue; | |
5446 | } | |
5447 | ||
5448 | if (!op->lrp_networks.n_ipv6_addrs) { | |
5449 | continue; | |
5450 | } | |
5451 | ||
5452 | const char *address_mode = smap_get( | |
5453 | &op->nbrp->ipv6_ra_configs, "address_mode"); | |
5454 | ||
5455 | if (!address_mode) { | |
5456 | continue; | |
5457 | } | |
5458 | if (strcmp(address_mode, "slaac") && | |
5459 | strcmp(address_mode, "dhcpv6_stateful") && | |
5460 | strcmp(address_mode, "dhcpv6_stateless")) { | |
5461 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
5462 | VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined", | |
5463 | address_mode); | |
5464 | continue; | |
5465 | } | |
5466 | ||
5467 | ds_clear(&match); | |
5468 | ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && nd_rs", | |
5469 | op->json_key); | |
5470 | ds_clear(&actions); | |
5471 | ||
5472 | const char *mtu_s = smap_get( | |
5473 | &op->nbrp->ipv6_ra_configs, "mtu"); | |
5474 | ||
5475 | /* As per RFC 2460, 1280 is minimum IPv6 MTU. */ | |
5476 | uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0; | |
5477 | ||
5478 | ds_put_format(&actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts(" | |
5479 | "addr_mode = \"%s\", slla = %s", | |
5480 | address_mode, op->lrp_networks.ea_s); | |
5481 | if (mtu > 0) { | |
5482 | ds_put_format(&actions, ", mtu = %u", mtu); | |
5483 | } | |
5484 | ||
5485 | bool add_rs_response_flow = false; | |
5486 | ||
5487 | for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
5488 | if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) { | |
5489 | continue; | |
5490 | } | |
5491 | ||
5492 | /* Add the prefix option if the address mode is slaac or | |
5493 | * dhcpv6_stateless. */ | |
5494 | if (strcmp(address_mode, "dhcpv6_stateful")) { | |
5495 | ds_put_format(&actions, ", prefix = %s/%u", | |
5496 | op->lrp_networks.ipv6_addrs[i].network_s, | |
5497 | op->lrp_networks.ipv6_addrs[i].plen); | |
5498 | } | |
5499 | add_rs_response_flow = true; | |
5500 | } | |
5501 | ||
5502 | if (add_rs_response_flow) { | |
5503 | ds_put_cstr(&actions, "); next;"); | |
5504 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS, 50, | |
5505 | ds_cstr(&match), ds_cstr(&actions)); | |
5506 | ds_clear(&actions); | |
5507 | ds_clear(&match); | |
5508 | ds_put_format(&match, "inport == %s && ip6.dst == ff02::2 && " | |
5509 | "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key); | |
5510 | ||
5511 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
5512 | struct in6_addr lla; | |
5513 | in6_generate_lla(op->lrp_networks.ea, &lla); | |
5514 | memset(ip6_str, 0, sizeof(ip6_str)); | |
5515 | ipv6_string_mapped(ip6_str, &lla); | |
5516 | ds_put_format(&actions, "eth.dst = eth.src; eth.src = %s; " | |
5517 | "ip6.dst = ip6.src; ip6.src = %s; " | |
5518 | "outport = inport; flags.loopback = 1; " | |
5519 | "output;", | |
5520 | op->lrp_networks.ea_s, ip6_str); | |
5521 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ND_RA_RESPONSE, 50, | |
5522 | ds_cstr(&match), ds_cstr(&actions)); | |
5523 | } | |
5524 | } | |
5525 | ||
5526 | /* Logical router ingress table 5, 6: RS responder, by default goto next. | |
5527 | * (priority 0)*/ | |
5528 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
5529 | if (!od->nbr) { | |
5530 | continue; | |
5531 | } | |
5532 | ||
5533 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;"); | |
5534 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;"); | |
5535 | } | |
5536 | ||
5537 | /* Logical router ingress table 7: IP Routing. | |
9975d7be BP |
5538 | * |
5539 | * A packet that arrives at this table is an IP packet that should be | |
6fdb7cd6 JP |
5540 | * routed to the address in 'ip[46].dst'. This table sets outport to |
5541 | * the correct output port, eth.src to the output port's MAC | |
5542 | * address, and '[xx]reg0' to the next-hop IP address (leaving | |
5543 | * 'ip[46].dst', the packet’s final destination, unchanged), and | |
5544 | * advances to the next table for ARP/ND resolution. */ | |
9975d7be | 5545 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 5546 | if (!op->nbrp) { |
9975d7be BP |
5547 | continue; |
5548 | } | |
5549 | ||
4685e523 JP |
5550 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
5551 | add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, | |
5552 | op->lrp_networks.ipv4_addrs[i].network_s, | |
440a9f4b | 5553 | op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL); |
4685e523 | 5554 | } |
6fdb7cd6 JP |
5555 | |
5556 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
5557 | add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, | |
5558 | op->lrp_networks.ipv6_addrs[i].network_s, | |
440a9f4b | 5559 | op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL); |
6fdb7cd6 | 5560 | } |
9975d7be | 5561 | } |
4685e523 | 5562 | |
6fdb7cd6 | 5563 | /* Convert the static routes to flows. */ |
9975d7be BP |
5564 | HMAP_FOR_EACH (od, key_node, datapaths) { |
5565 | if (!od->nbr) { | |
5566 | continue; | |
5567 | } | |
5568 | ||
28dc3fe9 SR |
5569 | for (int i = 0; i < od->nbr->n_static_routes; i++) { |
5570 | const struct nbrec_logical_router_static_route *route; | |
5571 | ||
5572 | route = od->nbr->static_routes[i]; | |
5573 | build_static_route_flow(lflows, od, ports, route); | |
5574 | } | |
9975d7be | 5575 | } |
6fdb7cd6 | 5576 | |
9975d7be BP |
5577 | /* XXX destination unreachable */ |
5578 | ||
4364646c | 5579 | /* Local router ingress table 8: ARP Resolution. |
9975d7be BP |
5580 | * |
5581 | * Any packet that reaches this table is an IP packet whose next-hop IP | |
5582 | * address is in reg0. (ip4.dst is the final destination.) This table | |
5583 | * resolves the IP address in reg0 into an output port in outport and an | |
5584 | * Ethernet address in eth.dst. */ | |
5585 | HMAP_FOR_EACH (op, key_node, ports) { | |
7ebfcd3d NS |
5586 | if (op->nbsp && !lsp_is_enabled(op->nbsp)) { |
5587 | continue; | |
5588 | } | |
5589 | ||
0ee00741 | 5590 | if (op->nbrp) { |
6fdb7cd6 JP |
5591 | /* This is a logical router port. If next-hop IP address in |
5592 | * '[xx]reg0' matches IP address of this router port, then | |
5593 | * the packet is intended to eventually be sent to this | |
5594 | * logical port. Set the destination mac address using this | |
5595 | * port's mac address. | |
509afdc3 GS |
5596 | * |
5597 | * The packet is still in peer's logical pipeline. So the match | |
5598 | * should be on peer's outport. */ | |
6fdb7cd6 JP |
5599 | if (op->peer && op->nbrp->peer) { |
5600 | if (op->lrp_networks.n_ipv4_addrs) { | |
5601 | ds_clear(&match); | |
5602 | ds_put_format(&match, "outport == %s && reg0 == ", | |
5603 | op->peer->json_key); | |
5604 | op_put_v4_networks(&match, op, false); | |
5605 | ||
5606 | ds_clear(&actions); | |
5607 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5608 | op->lrp_networks.ea_s); | |
5609 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5610 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5611 | } | |
4685e523 | 5612 | |
6fdb7cd6 JP |
5613 | if (op->lrp_networks.n_ipv6_addrs) { |
5614 | ds_clear(&match); | |
5615 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
5616 | op->peer->json_key); | |
5617 | op_put_v6_networks(&match, op); | |
5618 | ||
5619 | ds_clear(&actions); | |
5620 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5621 | op->lrp_networks.ea_s); | |
5622 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5623 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5624 | } | |
509afdc3 | 5625 | } |
0ee00741 | 5626 | } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
5627 | /* This is a logical switch port that backs a VM or a container. |
5628 | * Extract its addresses. For each of the address, go through all | |
5629 | * the router ports attached to the switch (to which this port | |
5630 | * connects) and if the address in question is reachable from the | |
6fdb7cd6 | 5631 | * router port, add an ARP/ND entry in that router's pipeline. */ |
75cf9d2b | 5632 | |
e93b43d6 | 5633 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
4685e523 | 5634 | const char *ea_s = op->lsp_addrs[i].ea_s; |
e93b43d6 | 5635 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { |
4685e523 | 5636 | const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; |
e93b43d6 | 5637 | for (size_t k = 0; k < op->od->n_router_ports; k++) { |
80f408f4 JP |
5638 | /* Get the Logical_Router_Port that the |
5639 | * Logical_Switch_Port is connected to, as | |
5640 | * 'peer'. */ | |
86e98048 | 5641 | const char *peer_name = smap_get( |
0ee00741 | 5642 | &op->od->router_ports[k]->nbsp->options, |
86e98048 BP |
5643 | "router-port"); |
5644 | if (!peer_name) { | |
5645 | continue; | |
5646 | } | |
5647 | ||
e93b43d6 | 5648 | struct ovn_port *peer = ovn_port_find(ports, peer_name); |
0ee00741 | 5649 | if (!peer || !peer->nbrp) { |
86e98048 BP |
5650 | continue; |
5651 | } | |
5652 | ||
4685e523 | 5653 | if (!find_lrp_member_ip(peer, ip_s)) { |
86e98048 BP |
5654 | continue; |
5655 | } | |
5656 | ||
09b39248 | 5657 | ds_clear(&match); |
e93b43d6 | 5658 | ds_put_format(&match, "outport == %s && reg0 == %s", |
4685e523 JP |
5659 | peer->json_key, ip_s); |
5660 | ||
09b39248 | 5661 | ds_clear(&actions); |
4685e523 | 5662 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); |
86e98048 | 5663 | ovn_lflow_add(lflows, peer->od, |
09b39248 JP |
5664 | S_ROUTER_IN_ARP_RESOLVE, 100, |
5665 | ds_cstr(&match), ds_cstr(&actions)); | |
86e98048 | 5666 | } |
9975d7be | 5667 | } |
6fdb7cd6 JP |
5668 | |
5669 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
5670 | const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; | |
5671 | for (size_t k = 0; k < op->od->n_router_ports; k++) { | |
5672 | /* Get the Logical_Router_Port that the | |
5673 | * Logical_Switch_Port is connected to, as | |
5674 | * 'peer'. */ | |
5675 | const char *peer_name = smap_get( | |
5676 | &op->od->router_ports[k]->nbsp->options, | |
5677 | "router-port"); | |
5678 | if (!peer_name) { | |
5679 | continue; | |
5680 | } | |
5681 | ||
5682 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
5683 | if (!peer || !peer->nbrp) { | |
5684 | continue; | |
5685 | } | |
5686 | ||
5687 | if (!find_lrp_member_ip(peer, ip_s)) { | |
5688 | continue; | |
5689 | } | |
5690 | ||
5691 | ds_clear(&match); | |
5692 | ds_put_format(&match, "outport == %s && xxreg0 == %s", | |
5693 | peer->json_key, ip_s); | |
5694 | ||
5695 | ds_clear(&actions); | |
5696 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); | |
5697 | ovn_lflow_add(lflows, peer->od, | |
5698 | S_ROUTER_IN_ARP_RESOLVE, 100, | |
5699 | ds_cstr(&match), ds_cstr(&actions)); | |
5700 | } | |
5701 | } | |
9975d7be | 5702 | } |
0ee00741 | 5703 | } else if (!strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
5704 | /* This is a logical switch port that connects to a router. */ |
5705 | ||
5706 | /* The peer of this switch port is the router port for which | |
5707 | * we need to add logical flows such that it can resolve | |
5708 | * ARP entries for all the other router ports connected to | |
5709 | * the switch in question. */ | |
5710 | ||
0ee00741 | 5711 | const char *peer_name = smap_get(&op->nbsp->options, |
75cf9d2b GS |
5712 | "router-port"); |
5713 | if (!peer_name) { | |
5714 | continue; | |
5715 | } | |
5716 | ||
5717 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 5718 | if (!peer || !peer->nbrp) { |
75cf9d2b GS |
5719 | continue; |
5720 | } | |
5721 | ||
4685e523 | 5722 | for (size_t i = 0; i < op->od->n_router_ports; i++) { |
75cf9d2b | 5723 | const char *router_port_name = smap_get( |
0ee00741 | 5724 | &op->od->router_ports[i]->nbsp->options, |
75cf9d2b GS |
5725 | "router-port"); |
5726 | struct ovn_port *router_port = ovn_port_find(ports, | |
5727 | router_port_name); | |
0ee00741 | 5728 | if (!router_port || !router_port->nbrp) { |
75cf9d2b GS |
5729 | continue; |
5730 | } | |
5731 | ||
5732 | /* Skip the router port under consideration. */ | |
5733 | if (router_port == peer) { | |
5734 | continue; | |
5735 | } | |
5736 | ||
6fdb7cd6 JP |
5737 | if (router_port->lrp_networks.n_ipv4_addrs) { |
5738 | ds_clear(&match); | |
5739 | ds_put_format(&match, "outport == %s && reg0 == ", | |
5740 | peer->json_key); | |
5741 | op_put_v4_networks(&match, router_port, false); | |
5742 | ||
5743 | ds_clear(&actions); | |
5744 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5745 | router_port->lrp_networks.ea_s); | |
5746 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5747 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5748 | } | |
4685e523 | 5749 | |
6fdb7cd6 JP |
5750 | if (router_port->lrp_networks.n_ipv6_addrs) { |
5751 | ds_clear(&match); | |
5752 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
5753 | peer->json_key); | |
5754 | op_put_v6_networks(&match, router_port); | |
5755 | ||
5756 | ds_clear(&actions); | |
5757 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5758 | router_port->lrp_networks.ea_s); | |
5759 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5760 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5761 | } | |
75cf9d2b | 5762 | } |
9975d7be BP |
5763 | } |
5764 | } | |
75cf9d2b | 5765 | |
0bac7164 BP |
5766 | HMAP_FOR_EACH (od, key_node, datapaths) { |
5767 | if (!od->nbr) { | |
5768 | continue; | |
5769 | } | |
5770 | ||
c34a87b6 JP |
5771 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", |
5772 | "get_arp(outport, reg0); next;"); | |
5773 | ||
5774 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", | |
5775 | "get_nd(outport, xxreg0); next;"); | |
0bac7164 BP |
5776 | } |
5777 | ||
4364646c | 5778 | /* Logical router ingress table 9: Gateway redirect. |
41a15b71 MS |
5779 | * |
5780 | * For traffic with outport equal to the l3dgw_port | |
5781 | * on a distributed router, this table redirects a subset | |
5782 | * of the traffic to the l3redirect_port which represents | |
5783 | * the central instance of the l3dgw_port. | |
5784 | */ | |
5785 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
5786 | if (!od->nbr) { | |
5787 | continue; | |
5788 | } | |
5789 | if (od->l3dgw_port && od->l3redirect_port) { | |
5790 | /* For traffic with outport == l3dgw_port, if the | |
5791 | * packet did not match any higher priority redirect | |
5792 | * rule, then the traffic is redirected to the central | |
5793 | * instance of the l3dgw_port. */ | |
5794 | ds_clear(&match); | |
5795 | ds_put_format(&match, "outport == %s", | |
5796 | od->l3dgw_port->json_key); | |
5797 | ds_clear(&actions); | |
5798 | ds_put_format(&actions, "outport = %s; next;", | |
5799 | od->l3redirect_port->json_key); | |
5800 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, | |
5801 | ds_cstr(&match), ds_cstr(&actions)); | |
5802 | ||
5803 | /* If the Ethernet destination has not been resolved, | |
5804 | * redirect to the central instance of the l3dgw_port. | |
5805 | * Such traffic will be replaced by an ARP request or ND | |
5806 | * Neighbor Solicitation in the ARP request ingress | |
5807 | * table, before being redirected to the central instance. | |
5808 | */ | |
5809 | ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00"); | |
5810 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150, | |
5811 | ds_cstr(&match), ds_cstr(&actions)); | |
5812 | } | |
5813 | ||
5814 | /* Packets are allowed by default. */ | |
5815 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); | |
5816 | } | |
5817 | ||
4364646c | 5818 | /* Local router ingress table 10: ARP request. |
0bac7164 BP |
5819 | * |
5820 | * In the common case where the Ethernet destination has been resolved, | |
94300e09 | 5821 | * this table outputs the packet (priority 0). Otherwise, it composes |
b1a3a6a4 | 5822 | * and sends an ARP/IPv6 NA request (priority 100). */ |
0bac7164 BP |
5823 | HMAP_FOR_EACH (od, key_node, datapaths) { |
5824 | if (!od->nbr) { | |
5825 | continue; | |
5826 | } | |
5827 | ||
5828 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, | |
5829 | "eth.dst == 00:00:00:00:00:00", | |
5830 | "arp { " | |
5831 | "eth.dst = ff:ff:ff:ff:ff:ff; " | |
5832 | "arp.spa = reg1; " | |
47021598 | 5833 | "arp.tpa = reg0; " |
0bac7164 BP |
5834 | "arp.op = 1; " /* ARP request */ |
5835 | "output; " | |
5836 | "};"); | |
b1a3a6a4 NS |
5837 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, |
5838 | "eth.dst == 00:00:00:00:00:00", | |
5839 | "nd_ns { " | |
5840 | "nd.target = xxreg0; " | |
5841 | "output; " | |
5842 | "};"); | |
0bac7164 BP |
5843 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); |
5844 | } | |
9975d7be | 5845 | |
de297547 | 5846 | /* Logical router egress table 1: Delivery (priority 100). |
9975d7be BP |
5847 | * |
5848 | * Priority 100 rules deliver packets to enabled logical ports. */ | |
5849 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 5850 | if (!op->nbrp) { |
9975d7be BP |
5851 | continue; |
5852 | } | |
5853 | ||
0ee00741 | 5854 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
5855 | /* Drop packets to disabled logical ports (since logical flow |
5856 | * tables are default-drop). */ | |
5857 | continue; | |
5858 | } | |
5859 | ||
41a15b71 MS |
5860 | if (op->derived) { |
5861 | /* No egress packets should be processed in the context of | |
5862 | * a chassisredirect port. The chassisredirect port should | |
5863 | * be replaced by the l3dgw port in the local output | |
5864 | * pipeline stage before egress processing. */ | |
5865 | continue; | |
5866 | } | |
5867 | ||
09b39248 JP |
5868 | ds_clear(&match); |
5869 | ds_put_format(&match, "outport == %s", op->json_key); | |
9975d7be | 5870 | ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, |
09b39248 | 5871 | ds_cstr(&match), "output;"); |
9975d7be | 5872 | } |
09b39248 JP |
5873 | |
5874 | ds_destroy(&match); | |
5875 | ds_destroy(&actions); | |
9975d7be BP |
5876 | } |
5877 | ||
5878 | /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, | |
5879 | * constructing their contents based on the OVN_NB database. */ | |
5880 | static void | |
5881 | build_lflows(struct northd_context *ctx, struct hmap *datapaths, | |
5882 | struct hmap *ports) | |
5883 | { | |
5884 | struct hmap lflows = HMAP_INITIALIZER(&lflows); | |
5885 | struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); | |
5886 | ||
5887 | build_lswitch_flows(datapaths, ports, &lflows, &mcgroups); | |
5888 | build_lrouter_flows(datapaths, ports, &lflows); | |
5889 | ||
5868eb24 BP |
5890 | /* Push changes to the Logical_Flow table to database. */ |
5891 | const struct sbrec_logical_flow *sbflow, *next_sbflow; | |
5892 | SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { | |
5893 | struct ovn_datapath *od | |
5894 | = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath); | |
5895 | if (!od) { | |
5896 | sbrec_logical_flow_delete(sbflow); | |
5897 | continue; | |
eb00399e | 5898 | } |
eb00399e | 5899 | |
9975d7be | 5900 | enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER; |
880fcd14 BP |
5901 | enum ovn_pipeline pipeline |
5902 | = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT; | |
5868eb24 | 5903 | struct ovn_lflow *lflow = ovn_lflow_find( |
880fcd14 BP |
5904 | &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id), |
5905 | sbflow->priority, sbflow->match, sbflow->actions); | |
5868eb24 BP |
5906 | if (lflow) { |
5907 | ovn_lflow_destroy(&lflows, lflow); | |
5908 | } else { | |
5909 | sbrec_logical_flow_delete(sbflow); | |
4edcdcf4 RB |
5910 | } |
5911 | } | |
5868eb24 BP |
5912 | struct ovn_lflow *lflow, *next_lflow; |
5913 | HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) { | |
880fcd14 BP |
5914 | enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage); |
5915 | uint8_t table = ovn_stage_get_table(lflow->stage); | |
5916 | ||
5868eb24 BP |
5917 | sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn); |
5918 | sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb); | |
9975d7be BP |
5919 | sbrec_logical_flow_set_pipeline( |
5920 | sbflow, pipeline == P_IN ? "ingress" : "egress"); | |
880fcd14 | 5921 | sbrec_logical_flow_set_table_id(sbflow, table); |
5868eb24 BP |
5922 | sbrec_logical_flow_set_priority(sbflow, lflow->priority); |
5923 | sbrec_logical_flow_set_match(sbflow, lflow->match); | |
5924 | sbrec_logical_flow_set_actions(sbflow, lflow->actions); | |
091e3af9 | 5925 | |
d8026bbf BP |
5926 | /* Trim the source locator lflow->where, which looks something like |
5927 | * "ovn/northd/ovn-northd.c:1234", down to just the part following the | |
5928 | * last slash, e.g. "ovn-northd.c:1234". */ | |
5929 | const char *slash = strrchr(lflow->where, '/'); | |
5930 | #if _WIN32 | |
5931 | const char *backslash = strrchr(lflow->where, '\\'); | |
5932 | if (!slash || backslash > slash) { | |
5933 | slash = backslash; | |
5934 | } | |
5935 | #endif | |
5936 | const char *where = slash ? slash + 1 : lflow->where; | |
5937 | ||
17bfa2aa HZ |
5938 | struct smap ids = SMAP_INITIALIZER(&ids); |
5939 | smap_add(&ids, "stage-name", ovn_stage_to_str(lflow->stage)); | |
5940 | smap_add(&ids, "source", where); | |
5941 | if (lflow->stage_hint) { | |
5942 | smap_add(&ids, "stage-hint", lflow->stage_hint); | |
5943 | } | |
aaf881c6 | 5944 | sbrec_logical_flow_set_external_ids(sbflow, &ids); |
17bfa2aa | 5945 | smap_destroy(&ids); |
091e3af9 | 5946 | |
5868eb24 | 5947 | ovn_lflow_destroy(&lflows, lflow); |
eb00399e | 5948 | } |
5868eb24 BP |
5949 | hmap_destroy(&lflows); |
5950 | ||
5951 | /* Push changes to the Multicast_Group table to database. */ | |
5952 | const struct sbrec_multicast_group *sbmc, *next_sbmc; | |
5953 | SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) { | |
5954 | struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths, | |
5955 | sbmc->datapath); | |
5956 | if (!od) { | |
5957 | sbrec_multicast_group_delete(sbmc); | |
5958 | continue; | |
5959 | } | |
eb00399e | 5960 | |
5868eb24 BP |
5961 | struct multicast_group group = { .name = sbmc->name, |
5962 | .key = sbmc->tunnel_key }; | |
5963 | struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group); | |
5964 | if (mc) { | |
5965 | ovn_multicast_update_sbrec(mc, sbmc); | |
5966 | ovn_multicast_destroy(&mcgroups, mc); | |
5967 | } else { | |
5968 | sbrec_multicast_group_delete(sbmc); | |
5969 | } | |
5970 | } | |
5971 | struct ovn_multicast *mc, *next_mc; | |
5972 | HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) { | |
5973 | sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn); | |
5974 | sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb); | |
5975 | sbrec_multicast_group_set_name(sbmc, mc->group->name); | |
5976 | sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key); | |
5977 | ovn_multicast_update_sbrec(mc, sbmc); | |
5978 | ovn_multicast_destroy(&mcgroups, mc); | |
4edcdcf4 | 5979 | } |
5868eb24 | 5980 | hmap_destroy(&mcgroups); |
4edcdcf4 | 5981 | } |
ea382567 RB |
5982 | |
5983 | /* OVN_Northbound and OVN_Southbound have an identical Address_Set table. | |
5984 | * We always update OVN_Southbound to match the current data in | |
5985 | * OVN_Northbound, so that the address sets used in Logical_Flows in | |
5986 | * OVN_Southbound is checked against the proper set.*/ | |
5987 | static void | |
5988 | sync_address_sets(struct northd_context *ctx) | |
5989 | { | |
5990 | struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); | |
5991 | ||
5992 | const struct sbrec_address_set *sb_address_set; | |
5993 | SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) { | |
5994 | shash_add(&sb_address_sets, sb_address_set->name, sb_address_set); | |
5995 | } | |
5996 | ||
5997 | const struct nbrec_address_set *nb_address_set; | |
5998 | NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) { | |
5999 | sb_address_set = shash_find_and_delete(&sb_address_sets, | |
6000 | nb_address_set->name); | |
6001 | if (!sb_address_set) { | |
6002 | sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn); | |
6003 | sbrec_address_set_set_name(sb_address_set, nb_address_set->name); | |
6004 | } | |
6005 | ||
6006 | sbrec_address_set_set_addresses(sb_address_set, | |
6007 | /* "char **" is not compatible with "const char **" */ | |
6008 | (const char **) nb_address_set->addresses, | |
6009 | nb_address_set->n_addresses); | |
6010 | } | |
6011 | ||
6012 | struct shash_node *node, *next; | |
6013 | SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) { | |
6014 | sbrec_address_set_delete(node->data); | |
6015 | shash_delete(&sb_address_sets, node); | |
6016 | } | |
6017 | shash_destroy(&sb_address_sets); | |
6018 | } | |
302eda27 NS |
6019 | |
6020 | /* | |
6021 | * struct 'dns_info' is used to sync the DNS records between OVN Northbound db | |
6022 | * and Southbound db. | |
6023 | */ | |
6024 | struct dns_info { | |
6025 | struct hmap_node hmap_node; | |
6026 | const struct nbrec_dns *nb_dns; /* DNS record in the Northbound db. */ | |
6027 | const struct sbrec_dns *sb_dns; /* DNS record in the Soutbound db. */ | |
6028 | ||
6029 | /* Datapaths to which the DNS entry is associated with it. */ | |
6030 | const struct sbrec_datapath_binding **sbs; | |
6031 | size_t n_sbs; | |
6032 | }; | |
6033 | ||
6034 | static inline struct dns_info * | |
6035 | get_dns_info_from_hmap(struct hmap *dns_map, struct uuid *uuid) | |
6036 | { | |
6037 | struct dns_info *dns_info; | |
6038 | size_t hash = uuid_hash(uuid); | |
6039 | HMAP_FOR_EACH_WITH_HASH (dns_info, hmap_node, hash, dns_map) { | |
6040 | if (uuid_equals(&dns_info->nb_dns->header_.uuid, uuid)) { | |
6041 | return dns_info; | |
6042 | } | |
6043 | } | |
6044 | ||
6045 | return NULL; | |
6046 | } | |
6047 | ||
6048 | static void | |
6049 | sync_dns_entries(struct northd_context *ctx, struct hmap *datapaths) | |
6050 | { | |
6051 | struct hmap dns_map = HMAP_INITIALIZER(&dns_map); | |
6052 | struct ovn_datapath *od; | |
6053 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
6054 | if (!od->nbs || !od->nbs->n_dns_records) { | |
6055 | continue; | |
6056 | } | |
6057 | ||
6058 | for (size_t i = 0; i < od->nbs->n_dns_records; i++) { | |
6059 | struct dns_info *dns_info = get_dns_info_from_hmap( | |
6060 | &dns_map, &od->nbs->dns_records[i]->header_.uuid); | |
6061 | if (!dns_info) { | |
6062 | size_t hash = uuid_hash( | |
6063 | &od->nbs->dns_records[i]->header_.uuid); | |
6064 | dns_info = xzalloc(sizeof *dns_info);; | |
6065 | dns_info->nb_dns = od->nbs->dns_records[i]; | |
6066 | hmap_insert(&dns_map, &dns_info->hmap_node, hash); | |
6067 | } | |
6068 | ||
6069 | dns_info->n_sbs++; | |
6070 | dns_info->sbs = xrealloc(dns_info->sbs, | |
6071 | dns_info->n_sbs * sizeof *dns_info->sbs); | |
6072 | dns_info->sbs[dns_info->n_sbs - 1] = od->sb; | |
6073 | } | |
6074 | } | |
6075 | ||
6076 | const struct sbrec_dns *sbrec_dns, *next; | |
6077 | SBREC_DNS_FOR_EACH_SAFE (sbrec_dns, next, ctx->ovnsb_idl) { | |
6078 | const char *nb_dns_uuid = smap_get(&sbrec_dns->external_ids, "dns_id"); | |
6079 | struct uuid dns_uuid; | |
6080 | if (!nb_dns_uuid || !uuid_from_string(&dns_uuid, nb_dns_uuid)) { | |
6081 | sbrec_dns_delete(sbrec_dns); | |
6082 | continue; | |
6083 | } | |
6084 | ||
6085 | struct dns_info *dns_info = | |
6086 | get_dns_info_from_hmap(&dns_map, &dns_uuid); | |
6087 | if (dns_info) { | |
6088 | dns_info->sb_dns = sbrec_dns; | |
6089 | } else { | |
6090 | sbrec_dns_delete(sbrec_dns); | |
6091 | } | |
6092 | } | |
6093 | ||
6094 | struct dns_info *dns_info; | |
6095 | HMAP_FOR_EACH_POP (dns_info, hmap_node, &dns_map) { | |
6096 | if (!dns_info->sb_dns) { | |
71f21279 | 6097 | sbrec_dns = sbrec_dns_insert(ctx->ovnsb_txn); |
302eda27 NS |
6098 | dns_info->sb_dns = sbrec_dns; |
6099 | char *dns_id = xasprintf( | |
6100 | UUID_FMT, UUID_ARGS(&dns_info->nb_dns->header_.uuid)); | |
6101 | const struct smap external_ids = | |
6102 | SMAP_CONST1(&external_ids, "dns_id", dns_id); | |
6103 | sbrec_dns_set_external_ids(sbrec_dns, &external_ids); | |
6104 | free(dns_id); | |
6105 | } | |
6106 | ||
6107 | /* Set the datapaths and records. If nothing has changed, then | |
6108 | * this will be a no-op. | |
6109 | */ | |
6110 | sbrec_dns_set_datapaths( | |
6111 | dns_info->sb_dns, | |
6112 | (struct sbrec_datapath_binding **)dns_info->sbs, | |
6113 | dns_info->n_sbs); | |
6114 | sbrec_dns_set_records(dns_info->sb_dns, &dns_info->nb_dns->records); | |
6115 | free(dns_info->sbs); | |
6116 | free(dns_info); | |
6117 | } | |
6118 | hmap_destroy(&dns_map); | |
6119 | } | |
6120 | ||
5868eb24 | 6121 | \f |
4edcdcf4 | 6122 | static void |
b86f4767 | 6123 | ovnnb_db_run(struct northd_context *ctx, struct chassis_index *chassis_index, |
6124 | struct ovsdb_idl_loop *sb_loop) | |
4edcdcf4 | 6125 | { |
b511690b | 6126 | if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) { |
331e7aef NS |
6127 | return; |
6128 | } | |
5868eb24 BP |
6129 | struct hmap datapaths, ports; |
6130 | build_datapaths(ctx, &datapaths); | |
b86f4767 | 6131 | build_ports(ctx, &datapaths, chassis_index, &ports); |
b511690b | 6132 | build_ipam(&datapaths, &ports); |
5868eb24 BP |
6133 | build_lflows(ctx, &datapaths, &ports); |
6134 | ||
ea382567 | 6135 | sync_address_sets(ctx); |
302eda27 | 6136 | sync_dns_entries(ctx, &datapaths); |
ea382567 | 6137 | |
5868eb24 BP |
6138 | struct ovn_datapath *dp, *next_dp; |
6139 | HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) { | |
6140 | ovn_datapath_destroy(&datapaths, dp); | |
6141 | } | |
6142 | hmap_destroy(&datapaths); | |
6143 | ||
6144 | struct ovn_port *port, *next_port; | |
6145 | HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) { | |
6146 | ovn_port_destroy(&ports, port); | |
6147 | } | |
6148 | hmap_destroy(&ports); | |
fa183acc BP |
6149 | |
6150 | /* Copy nb_cfg from northbound to southbound database. | |
6151 | * | |
6152 | * Also set up to update sb_cfg once our southbound transaction commits. */ | |
6153 | const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl); | |
14338f22 GS |
6154 | if (!nb) { |
6155 | nb = nbrec_nb_global_insert(ctx->ovnnb_txn); | |
6156 | } | |
fa183acc | 6157 | const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl); |
14338f22 GS |
6158 | if (!sb) { |
6159 | sb = sbrec_sb_global_insert(ctx->ovnsb_txn); | |
fa183acc | 6160 | } |
14338f22 GS |
6161 | sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg); |
6162 | sb_loop->next_cfg = nb->nb_cfg; | |
8639f9be ND |
6163 | |
6164 | cleanup_macam(&macam); | |
ac0630a2 RB |
6165 | } |
6166 | ||
fa183acc BP |
6167 | /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When |
6168 | * this column is not empty, it means we need to set the corresponding logical | |
6169 | * port as 'up' in the northbound DB. */ | |
ac0630a2 | 6170 | static void |
fa183acc | 6171 | update_logical_port_status(struct northd_context *ctx) |
ac0630a2 | 6172 | { |
fc3113bc | 6173 | struct hmap lports_hmap; |
5868eb24 | 6174 | const struct sbrec_port_binding *sb; |
0ee00741 | 6175 | const struct nbrec_logical_switch_port *nbsp; |
fc3113bc RB |
6176 | |
6177 | struct lport_hash_node { | |
6178 | struct hmap_node node; | |
0ee00741 | 6179 | const struct nbrec_logical_switch_port *nbsp; |
4ec3d7c7 | 6180 | } *hash_node; |
f93818dd | 6181 | |
fc3113bc | 6182 | hmap_init(&lports_hmap); |
f93818dd | 6183 | |
0ee00741 | 6184 | NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) { |
fc3113bc | 6185 | hash_node = xzalloc(sizeof *hash_node); |
0ee00741 HK |
6186 | hash_node->nbsp = nbsp; |
6187 | hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0)); | |
fc3113bc RB |
6188 | } |
6189 | ||
5868eb24 | 6190 | SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) { |
0ee00741 | 6191 | nbsp = NULL; |
fc3113bc | 6192 | HMAP_FOR_EACH_WITH_HASH(hash_node, node, |
5868eb24 BP |
6193 | hash_string(sb->logical_port, 0), |
6194 | &lports_hmap) { | |
0ee00741 HK |
6195 | if (!strcmp(sb->logical_port, hash_node->nbsp->name)) { |
6196 | nbsp = hash_node->nbsp; | |
fc3113bc RB |
6197 | break; |
6198 | } | |
f93818dd RB |
6199 | } |
6200 | ||
0ee00741 | 6201 | if (!nbsp) { |
dcda6e0d | 6202 | /* The logical port doesn't exist for this port binding. This can |
2e2762d4 | 6203 | * happen under normal circumstances when ovn-northd hasn't gotten |
dcda6e0d | 6204 | * around to pruning the Port_Binding yet. */ |
f93818dd RB |
6205 | continue; |
6206 | } | |
6207 | ||
0ee00741 | 6208 | if (sb->chassis && (!nbsp->up || !*nbsp->up)) { |
f93818dd | 6209 | bool up = true; |
0ee00741 HK |
6210 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
6211 | } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) { | |
f93818dd | 6212 | bool up = false; |
0ee00741 | 6213 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
f93818dd RB |
6214 | } |
6215 | } | |
fc3113bc | 6216 | |
4ec3d7c7 | 6217 | HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) { |
fc3113bc RB |
6218 | free(hash_node); |
6219 | } | |
6220 | hmap_destroy(&lports_hmap); | |
ac0630a2 | 6221 | } |
45f98d4c | 6222 | |
16936e4d | 6223 | static struct gen_opts_map supported_dhcp_opts[] = { |
281977f7 NS |
6224 | OFFERIP, |
6225 | DHCP_OPT_NETMASK, | |
6226 | DHCP_OPT_ROUTER, | |
6227 | DHCP_OPT_DNS_SERVER, | |
6228 | DHCP_OPT_LOG_SERVER, | |
6229 | DHCP_OPT_LPR_SERVER, | |
6230 | DHCP_OPT_SWAP_SERVER, | |
6231 | DHCP_OPT_POLICY_FILTER, | |
6232 | DHCP_OPT_ROUTER_SOLICITATION, | |
6233 | DHCP_OPT_NIS_SERVER, | |
6234 | DHCP_OPT_NTP_SERVER, | |
6235 | DHCP_OPT_SERVER_ID, | |
6236 | DHCP_OPT_TFTP_SERVER, | |
6237 | DHCP_OPT_CLASSLESS_STATIC_ROUTE, | |
6238 | DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE, | |
6239 | DHCP_OPT_IP_FORWARD_ENABLE, | |
6240 | DHCP_OPT_ROUTER_DISCOVERY, | |
6241 | DHCP_OPT_ETHERNET_ENCAP, | |
6242 | DHCP_OPT_DEFAULT_TTL, | |
6243 | DHCP_OPT_TCP_TTL, | |
6244 | DHCP_OPT_MTU, | |
6245 | DHCP_OPT_LEASE_TIME, | |
6246 | DHCP_OPT_T1, | |
6247 | DHCP_OPT_T2 | |
6248 | }; | |
6249 | ||
16936e4d | 6250 | static struct gen_opts_map supported_dhcpv6_opts[] = { |
33ac3c83 NS |
6251 | DHCPV6_OPT_IA_ADDR, |
6252 | DHCPV6_OPT_SERVER_ID, | |
6253 | DHCPV6_OPT_DOMAIN_SEARCH, | |
6254 | DHCPV6_OPT_DNS_SERVER | |
6255 | }; | |
6256 | ||
281977f7 NS |
6257 | static void |
6258 | check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx) | |
6259 | { | |
6260 | struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add); | |
6261 | for (size_t i = 0; (i < sizeof(supported_dhcp_opts) / | |
6262 | sizeof(supported_dhcp_opts[0])); i++) { | |
6263 | hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node, | |
6264 | dhcp_opt_hash(supported_dhcp_opts[i].name)); | |
6265 | } | |
6266 | ||
6267 | const struct sbrec_dhcp_options *opt_row, *opt_row_next; | |
6268 | SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
16936e4d | 6269 | struct gen_opts_map *dhcp_opt = |
281977f7 NS |
6270 | dhcp_opts_find(&dhcp_opts_to_add, opt_row->name); |
6271 | if (dhcp_opt) { | |
6272 | hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node); | |
6273 | } else { | |
6274 | sbrec_dhcp_options_delete(opt_row); | |
6275 | } | |
6276 | } | |
6277 | ||
16936e4d | 6278 | struct gen_opts_map *opt; |
281977f7 NS |
6279 | HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) { |
6280 | struct sbrec_dhcp_options *sbrec_dhcp_option = | |
6281 | sbrec_dhcp_options_insert(ctx->ovnsb_txn); | |
6282 | sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name); | |
6283 | sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code); | |
6284 | sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type); | |
6285 | } | |
6286 | ||
6287 | hmap_destroy(&dhcp_opts_to_add); | |
6288 | } | |
6289 | ||
33ac3c83 NS |
6290 | static void |
6291 | check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx) | |
6292 | { | |
6293 | struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add); | |
6294 | for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) / | |
6295 | sizeof(supported_dhcpv6_opts[0])); i++) { | |
6296 | hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node, | |
6297 | dhcp_opt_hash(supported_dhcpv6_opts[i].name)); | |
6298 | } | |
6299 | ||
6300 | const struct sbrec_dhcpv6_options *opt_row, *opt_row_next; | |
6301 | SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
16936e4d | 6302 | struct gen_opts_map *dhcp_opt = |
33ac3c83 NS |
6303 | dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name); |
6304 | if (dhcp_opt) { | |
6305 | hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node); | |
6306 | } else { | |
6307 | sbrec_dhcpv6_options_delete(opt_row); | |
6308 | } | |
6309 | } | |
6310 | ||
16936e4d | 6311 | struct gen_opts_map *opt; |
33ac3c83 NS |
6312 | HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) { |
6313 | struct sbrec_dhcpv6_options *sbrec_dhcpv6_option = | |
6314 | sbrec_dhcpv6_options_insert(ctx->ovnsb_txn); | |
6315 | sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name); | |
6316 | sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code); | |
6317 | sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type); | |
6318 | } | |
6319 | ||
6320 | hmap_destroy(&dhcpv6_opts_to_add); | |
6321 | } | |
6322 | ||
75ddb5f4 LR |
6323 | static const char *rbac_chassis_auth[] = |
6324 | {"name"}; | |
6325 | static const char *rbac_chassis_update[] = | |
6326 | {"nb_cfg", "external_ids", "encaps", "vtep_logical_switches"}; | |
6327 | ||
6328 | static const char *rbac_encap_auth[] = | |
af0e9721 | 6329 | {"chassis_name"}; |
75ddb5f4 LR |
6330 | static const char *rbac_encap_update[] = |
6331 | {"type", "options", "ip"}; | |
6332 | ||
6333 | static const char *rbac_port_binding_auth[] = | |
6334 | {""}; | |
6335 | static const char *rbac_port_binding_update[] = | |
6336 | {"chassis"}; | |
6337 | ||
6338 | static const char *rbac_mac_binding_auth[] = | |
6339 | {""}; | |
6340 | static const char *rbac_mac_binding_update[] = | |
6341 | {"logical_port", "ip", "mac", "datapath"}; | |
6342 | ||
6343 | static struct rbac_perm_cfg { | |
6344 | const char *table; | |
6345 | const char **auth; | |
6346 | int n_auth; | |
6347 | bool insdel; | |
6348 | const char **update; | |
6349 | int n_update; | |
6350 | const struct sbrec_rbac_permission *row; | |
6351 | } rbac_perm_cfg[] = { | |
6352 | { | |
6353 | .table = "Chassis", | |
6354 | .auth = rbac_chassis_auth, | |
6355 | .n_auth = ARRAY_SIZE(rbac_chassis_auth), | |
6356 | .insdel = true, | |
6357 | .update = rbac_chassis_update, | |
6358 | .n_update = ARRAY_SIZE(rbac_chassis_update), | |
6359 | .row = NULL | |
6360 | },{ | |
6361 | .table = "Encap", | |
6362 | .auth = rbac_encap_auth, | |
6363 | .n_auth = ARRAY_SIZE(rbac_encap_auth), | |
6364 | .insdel = true, | |
6365 | .update = rbac_encap_update, | |
6366 | .n_update = ARRAY_SIZE(rbac_encap_update), | |
6367 | .row = NULL | |
6368 | },{ | |
6369 | .table = "Port_Binding", | |
6370 | .auth = rbac_port_binding_auth, | |
6371 | .n_auth = ARRAY_SIZE(rbac_port_binding_auth), | |
6372 | .insdel = false, | |
6373 | .update = rbac_port_binding_update, | |
6374 | .n_update = ARRAY_SIZE(rbac_port_binding_update), | |
6375 | .row = NULL | |
6376 | },{ | |
6377 | .table = "MAC_Binding", | |
6378 | .auth = rbac_mac_binding_auth, | |
6379 | .n_auth = ARRAY_SIZE(rbac_mac_binding_auth), | |
6380 | .insdel = true, | |
6381 | .update = rbac_mac_binding_update, | |
6382 | .n_update = ARRAY_SIZE(rbac_mac_binding_update), | |
6383 | .row = NULL | |
6384 | },{ | |
6385 | .table = NULL, | |
6386 | .auth = NULL, | |
6387 | .n_auth = 0, | |
6388 | .insdel = false, | |
6389 | .update = NULL, | |
6390 | .n_update = 0, | |
6391 | .row = NULL | |
6392 | } | |
6393 | }; | |
6394 | ||
6395 | static bool | |
6396 | ovn_rbac_validate_perm(const struct sbrec_rbac_permission *perm) | |
6397 | { | |
6398 | struct rbac_perm_cfg *pcfg; | |
6399 | int i, j, n_found; | |
6400 | ||
6401 | for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) { | |
6402 | if (!strcmp(perm->table, pcfg->table)) { | |
6403 | break; | |
6404 | } | |
6405 | } | |
6406 | if (!pcfg->table) { | |
6407 | return false; | |
6408 | } | |
6409 | if (perm->n_authorization != pcfg->n_auth || | |
6410 | perm->n_update != pcfg->n_update) { | |
6411 | return false; | |
6412 | } | |
6413 | if (perm->insert_delete != pcfg->insdel) { | |
6414 | return false; | |
6415 | } | |
6416 | /* verify perm->authorization vs. pcfg->auth */ | |
6417 | n_found = 0; | |
6418 | for (i = 0; i < pcfg->n_auth; i++) { | |
6419 | for (j = 0; j < perm->n_authorization; j++) { | |
6420 | if (!strcmp(pcfg->auth[i], perm->authorization[j])) { | |
6421 | n_found++; | |
6422 | break; | |
6423 | } | |
6424 | } | |
6425 | } | |
6426 | if (n_found != pcfg->n_auth) { | |
6427 | return false; | |
6428 | } | |
6429 | ||
6430 | /* verify perm->update vs. pcfg->update */ | |
6431 | n_found = 0; | |
6432 | for (i = 0; i < pcfg->n_update; i++) { | |
6433 | for (j = 0; j < perm->n_update; j++) { | |
6434 | if (!strcmp(pcfg->update[i], perm->update[j])) { | |
6435 | n_found++; | |
6436 | break; | |
6437 | } | |
6438 | } | |
6439 | } | |
6440 | if (n_found != pcfg->n_update) { | |
6441 | return false; | |
6442 | } | |
6443 | ||
6444 | /* Success, db state matches expected state */ | |
6445 | pcfg->row = perm; | |
6446 | return true; | |
6447 | } | |
6448 | ||
6449 | static void | |
6450 | ovn_rbac_create_perm(struct rbac_perm_cfg *pcfg, | |
6451 | struct northd_context *ctx, | |
6452 | const struct sbrec_rbac_role *rbac_role) | |
6453 | { | |
6454 | struct sbrec_rbac_permission *rbac_perm; | |
6455 | ||
6456 | rbac_perm = sbrec_rbac_permission_insert(ctx->ovnsb_txn); | |
6457 | sbrec_rbac_permission_set_table(rbac_perm, pcfg->table); | |
6458 | sbrec_rbac_permission_set_authorization(rbac_perm, | |
6459 | pcfg->auth, | |
6460 | pcfg->n_auth); | |
6461 | sbrec_rbac_permission_set_insert_delete(rbac_perm, pcfg->insdel); | |
6462 | sbrec_rbac_permission_set_update(rbac_perm, | |
6463 | pcfg->update, | |
6464 | pcfg->n_update); | |
6465 | sbrec_rbac_role_update_permissions_setkey(rbac_role, pcfg->table, | |
6466 | rbac_perm); | |
6467 | } | |
6468 | ||
6469 | static void | |
6470 | check_and_update_rbac(struct northd_context *ctx) | |
6471 | { | |
6472 | const struct sbrec_rbac_role *rbac_role = NULL; | |
6473 | const struct sbrec_rbac_permission *perm_row, *perm_next; | |
6474 | const struct sbrec_rbac_role *role_row, *role_row_next; | |
6475 | struct rbac_perm_cfg *pcfg; | |
6476 | ||
6477 | for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) { | |
6478 | pcfg->row = NULL; | |
6479 | } | |
6480 | ||
6481 | SBREC_RBAC_PERMISSION_FOR_EACH_SAFE (perm_row, perm_next, ctx->ovnsb_idl) { | |
6482 | if (!ovn_rbac_validate_perm(perm_row)) { | |
6483 | sbrec_rbac_permission_delete(perm_row); | |
6484 | } | |
6485 | } | |
6486 | SBREC_RBAC_ROLE_FOR_EACH_SAFE (role_row, role_row_next, ctx->ovnsb_idl) { | |
6487 | if (strcmp(role_row->name, "ovn-controller")) { | |
6488 | sbrec_rbac_role_delete(role_row); | |
6489 | } else { | |
6490 | rbac_role = role_row; | |
6491 | } | |
6492 | } | |
6493 | ||
6494 | if (!rbac_role) { | |
6495 | rbac_role = sbrec_rbac_role_insert(ctx->ovnsb_txn); | |
6496 | sbrec_rbac_role_set_name(rbac_role, "ovn-controller"); | |
6497 | } | |
6498 | ||
6499 | for (pcfg = rbac_perm_cfg; pcfg->table; pcfg++) { | |
6500 | if (!pcfg->row) { | |
6501 | ovn_rbac_create_perm(pcfg, ctx, rbac_role); | |
6502 | } | |
6503 | } | |
6504 | } | |
6505 | ||
fa183acc BP |
6506 | /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */ |
6507 | static void | |
6508 | update_northbound_cfg(struct northd_context *ctx, | |
6509 | struct ovsdb_idl_loop *sb_loop) | |
6510 | { | |
6511 | /* Update northbound sb_cfg if appropriate. */ | |
6512 | const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl); | |
6513 | int64_t sb_cfg = sb_loop->cur_cfg; | |
6514 | if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) { | |
6515 | nbrec_nb_global_set_sb_cfg(nbg, sb_cfg); | |
6516 | } | |
6517 | ||
6518 | /* Update northbound hv_cfg if appropriate. */ | |
6519 | if (nbg) { | |
6520 | /* Find minimum nb_cfg among all chassis. */ | |
6521 | const struct sbrec_chassis *chassis; | |
6522 | int64_t hv_cfg = nbg->nb_cfg; | |
6523 | SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) { | |
6524 | if (chassis->nb_cfg < hv_cfg) { | |
6525 | hv_cfg = chassis->nb_cfg; | |
6526 | } | |
6527 | } | |
6528 | ||
6529 | /* Update hv_cfg. */ | |
6530 | if (nbg->hv_cfg != hv_cfg) { | |
6531 | nbrec_nb_global_set_hv_cfg(nbg, hv_cfg); | |
6532 | } | |
6533 | } | |
6534 | } | |
6535 | ||
6536 | /* Handle a fairly small set of changes in the southbound database. */ | |
6537 | static void | |
6538 | ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop) | |
6539 | { | |
6540 | if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { | |
6541 | return; | |
6542 | } | |
6543 | ||
6544 | update_logical_port_status(ctx); | |
6545 | update_northbound_cfg(ctx, sb_loop); | |
6546 | } | |
6547 | \f | |
ac0630a2 RB |
6548 | static void |
6549 | parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) | |
6550 | { | |
6551 | enum { | |
67d9b930 | 6552 | DAEMON_OPTION_ENUMS, |
ac0630a2 | 6553 | VLOG_OPTION_ENUMS, |
e18a1d08 | 6554 | SSL_OPTION_ENUMS, |
ac0630a2 RB |
6555 | }; |
6556 | static const struct option long_options[] = { | |
ec78987f | 6557 | {"ovnsb-db", required_argument, NULL, 'd'}, |
ac0630a2 RB |
6558 | {"ovnnb-db", required_argument, NULL, 'D'}, |
6559 | {"help", no_argument, NULL, 'h'}, | |
6560 | {"options", no_argument, NULL, 'o'}, | |
6561 | {"version", no_argument, NULL, 'V'}, | |
67d9b930 | 6562 | DAEMON_LONG_OPTIONS, |
ac0630a2 RB |
6563 | VLOG_LONG_OPTIONS, |
6564 | STREAM_SSL_LONG_OPTIONS, | |
6565 | {NULL, 0, NULL, 0}, | |
6566 | }; | |
6567 | char *short_options = ovs_cmdl_long_options_to_short_options(long_options); | |
6568 | ||
6569 | for (;;) { | |
6570 | int c; | |
6571 | ||
6572 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
6573 | if (c == -1) { | |
6574 | break; | |
6575 | } | |
6576 | ||
6577 | switch (c) { | |
67d9b930 | 6578 | DAEMON_OPTION_HANDLERS; |
ac0630a2 RB |
6579 | VLOG_OPTION_HANDLERS; |
6580 | STREAM_SSL_OPTION_HANDLERS; | |
6581 | ||
6582 | case 'd': | |
ec78987f | 6583 | ovnsb_db = optarg; |
ac0630a2 RB |
6584 | break; |
6585 | ||
6586 | case 'D': | |
6587 | ovnnb_db = optarg; | |
6588 | break; | |
6589 | ||
6590 | case 'h': | |
6591 | usage(); | |
6592 | exit(EXIT_SUCCESS); | |
6593 | ||
6594 | case 'o': | |
6595 | ovs_cmdl_print_options(long_options); | |
6596 | exit(EXIT_SUCCESS); | |
6597 | ||
6598 | case 'V': | |
6599 | ovs_print_version(0, 0); | |
6600 | exit(EXIT_SUCCESS); | |
6601 | ||
6602 | default: | |
6603 | break; | |
6604 | } | |
6605 | } | |
6606 | ||
ec78987f | 6607 | if (!ovnsb_db) { |
60bdd011 | 6608 | ovnsb_db = default_sb_db(); |
ac0630a2 RB |
6609 | } |
6610 | ||
6611 | if (!ovnnb_db) { | |
60bdd011 | 6612 | ovnnb_db = default_nb_db(); |
ac0630a2 RB |
6613 | } |
6614 | ||
6615 | free(short_options); | |
6616 | } | |
6617 | ||
5868eb24 BP |
6618 | static void |
6619 | add_column_noalert(struct ovsdb_idl *idl, | |
6620 | const struct ovsdb_idl_column *column) | |
6621 | { | |
6622 | ovsdb_idl_add_column(idl, column); | |
6623 | ovsdb_idl_omit_alert(idl, column); | |
6624 | } | |
6625 | ||
ac0630a2 RB |
6626 | int |
6627 | main(int argc, char *argv[]) | |
6628 | { | |
ac0630a2 | 6629 | int res = EXIT_SUCCESS; |
7b303ff9 AW |
6630 | struct unixctl_server *unixctl; |
6631 | int retval; | |
6632 | bool exiting; | |
ac0630a2 RB |
6633 | |
6634 | fatal_ignore_sigpipe(); | |
3dada172 | 6635 | ovs_cmdl_proctitle_init(argc, argv); |
ac0630a2 | 6636 | set_program_name(argv[0]); |
485f0696 | 6637 | service_start(&argc, &argv); |
ac0630a2 | 6638 | parse_options(argc, argv); |
67d9b930 | 6639 | |
e91b927d | 6640 | daemonize_start(false); |
7b303ff9 AW |
6641 | |
6642 | retval = unixctl_server_create(NULL, &unixctl); | |
6643 | if (retval) { | |
6644 | exit(EXIT_FAILURE); | |
6645 | } | |
6646 | unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting); | |
6647 | ||
6648 | daemonize_complete(); | |
67d9b930 | 6649 | |
fa183acc | 6650 | /* We want to detect (almost) all changes to the ovn-nb db. */ |
331e7aef NS |
6651 | struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
6652 | ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); | |
fa183acc BP |
6653 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg); |
6654 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg); | |
331e7aef | 6655 | |
fa183acc | 6656 | /* We want to detect only selected changes to the ovn-sb db. */ |
331e7aef NS |
6657 | struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
6658 | ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true)); | |
6659 | ||
fa183acc BP |
6660 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global); |
6661 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg); | |
6662 | ||
331e7aef NS |
6663 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow); |
6664 | add_column_noalert(ovnsb_idl_loop.idl, | |
6665 | &sbrec_logical_flow_col_logical_datapath); | |
6666 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline); | |
6667 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id); | |
6668 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority); | |
6669 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match); | |
6670 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions); | |
6671 | ||
6672 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group); | |
6673 | add_column_noalert(ovnsb_idl_loop.idl, | |
6674 | &sbrec_multicast_group_col_datapath); | |
6675 | add_column_noalert(ovnsb_idl_loop.idl, | |
6676 | &sbrec_multicast_group_col_tunnel_key); | |
6677 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name); | |
6678 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports); | |
6679 | ||
6680 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding); | |
6681 | add_column_noalert(ovnsb_idl_loop.idl, | |
6682 | &sbrec_datapath_binding_col_tunnel_key); | |
6683 | add_column_noalert(ovnsb_idl_loop.idl, | |
6684 | &sbrec_datapath_binding_col_external_ids); | |
6685 | ||
6686 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding); | |
6687 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath); | |
6688 | add_column_noalert(ovnsb_idl_loop.idl, | |
6689 | &sbrec_port_binding_col_logical_port); | |
6690 | add_column_noalert(ovnsb_idl_loop.idl, | |
6691 | &sbrec_port_binding_col_tunnel_key); | |
6692 | add_column_noalert(ovnsb_idl_loop.idl, | |
6693 | &sbrec_port_binding_col_parent_port); | |
6694 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag); | |
6695 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); | |
6696 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options); | |
6697 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); | |
f40c5588 MS |
6698 | add_column_noalert(ovnsb_idl_loop.idl, |
6699 | &sbrec_port_binding_col_nat_addresses); | |
331e7aef | 6700 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis); |
b86f4767 | 6701 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, |
6702 | &sbrec_port_binding_col_gateway_chassis); | |
6703 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, | |
6704 | &sbrec_gateway_chassis_col_chassis); | |
6705 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_gateway_chassis_col_name); | |
6706 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, | |
6707 | &sbrec_gateway_chassis_col_priority); | |
6708 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, | |
6709 | &sbrec_gateway_chassis_col_external_ids); | |
6710 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, | |
6711 | &sbrec_gateway_chassis_col_options); | |
4a680bff BP |
6712 | add_column_noalert(ovnsb_idl_loop.idl, |
6713 | &sbrec_port_binding_col_external_ids); | |
6e31816f CSV |
6714 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding); |
6715 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath); | |
6716 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip); | |
6717 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac); | |
6718 | add_column_noalert(ovnsb_idl_loop.idl, | |
6719 | &sbrec_mac_binding_col_logical_port); | |
281977f7 NS |
6720 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options); |
6721 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code); | |
6722 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type); | |
6723 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name); | |
33ac3c83 NS |
6724 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options); |
6725 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code); | |
6726 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type); | |
6727 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name); | |
ea382567 RB |
6728 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set); |
6729 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name); | |
6730 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses); | |
6731 | ||
302eda27 NS |
6732 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dns); |
6733 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_datapaths); | |
6734 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_records); | |
6735 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dns_col_external_ids); | |
6736 | ||
75ddb5f4 LR |
6737 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_role); |
6738 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_name); | |
6739 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_role_col_permissions); | |
6740 | ||
6741 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_rbac_permission); | |
6742 | add_column_noalert(ovnsb_idl_loop.idl, | |
6743 | &sbrec_rbac_permission_col_table); | |
6744 | add_column_noalert(ovnsb_idl_loop.idl, | |
6745 | &sbrec_rbac_permission_col_authorization); | |
6746 | add_column_noalert(ovnsb_idl_loop.idl, | |
6747 | &sbrec_rbac_permission_col_insert_delete); | |
6748 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_rbac_permission_col_update); | |
6749 | ||
fa183acc BP |
6750 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); |
6751 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); | |
b86f4767 | 6752 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name); |
fa183acc | 6753 | |
46a54ce7 RB |
6754 | /* Ensure that only a single ovn-northd is active in the deployment by |
6755 | * acquiring a lock called "ovn_northd" on the southbound database | |
6756 | * and then only performing DB transactions if the lock is held. */ | |
6757 | ovsdb_idl_set_lock(ovnsb_idl_loop.idl, "ovn_northd"); | |
6758 | bool had_lock = false; | |
6759 | ||
331e7aef | 6760 | /* Main loop. */ |
7b303ff9 AW |
6761 | exiting = false; |
6762 | while (!exiting) { | |
331e7aef NS |
6763 | struct northd_context ctx = { |
6764 | .ovnnb_idl = ovnnb_idl_loop.idl, | |
6765 | .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), | |
6766 | .ovnsb_idl = ovnsb_idl_loop.idl, | |
6767 | .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), | |
6768 | }; | |
ac0630a2 | 6769 | |
46a54ce7 RB |
6770 | if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { |
6771 | VLOG_INFO("ovn-northd lock acquired. " | |
6772 | "This ovn-northd instance is now active."); | |
6773 | had_lock = true; | |
6774 | } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { | |
6775 | VLOG_INFO("ovn-northd lock lost. " | |
6776 | "This ovn-northd instance is now on standby."); | |
6777 | had_lock = false; | |
6778 | } | |
b86f4767 | 6779 | |
46a54ce7 RB |
6780 | struct chassis_index chassis_index; |
6781 | bool destroy_chassis_index = false; | |
6782 | if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) { | |
6783 | chassis_index_init(&chassis_index, ctx.ovnsb_idl); | |
6784 | destroy_chassis_index = true; | |
6785 | ||
6786 | ovnnb_db_run(&ctx, &chassis_index, &ovnsb_idl_loop); | |
6787 | ovnsb_db_run(&ctx, &ovnsb_idl_loop); | |
6788 | if (ctx.ovnsb_txn) { | |
6789 | check_and_add_supported_dhcp_opts_to_sb_db(&ctx); | |
6790 | check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx); | |
6791 | check_and_update_rbac(&ctx); | |
6792 | } | |
281977f7 | 6793 | } |
f93818dd | 6794 | |
331e7aef NS |
6795 | unixctl_server_run(unixctl); |
6796 | unixctl_server_wait(unixctl); | |
6797 | if (exiting) { | |
6798 | poll_immediate_wake(); | |
ac0630a2 | 6799 | } |
331e7aef NS |
6800 | ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); |
6801 | ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); | |
ac0630a2 | 6802 | |
331e7aef | 6803 | poll_block(); |
485f0696 GS |
6804 | if (should_service_stop()) { |
6805 | exiting = true; | |
6806 | } | |
b86f4767 | 6807 | |
46a54ce7 RB |
6808 | if (destroy_chassis_index) { |
6809 | chassis_index_destroy(&chassis_index); | |
6810 | } | |
ac0630a2 RB |
6811 | } |
6812 | ||
7b303ff9 | 6813 | unixctl_server_destroy(unixctl); |
331e7aef NS |
6814 | ovsdb_idl_loop_destroy(&ovnnb_idl_loop); |
6815 | ovsdb_idl_loop_destroy(&ovnsb_idl_loop); | |
485f0696 | 6816 | service_stop(); |
ac0630a2 RB |
6817 | |
6818 | exit(res); | |
6819 | } | |
7b303ff9 AW |
6820 | |
6821 | static void | |
6822 | ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
6823 | const char *argv[] OVS_UNUSED, void *exiting_) | |
6824 | { | |
6825 | bool *exiting = exiting_; | |
6826 | *exiting = true; | |
6827 | ||
6828 | unixctl_command_reply(conn, NULL); | |
6829 | } |