]>
Commit | Line | Data |
---|---|---|
ac0630a2 RB |
1 | /* |
2 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
3 | * you may not use this file except in compliance with the License. | |
4 | * You may obtain a copy of the License at: | |
5 | * | |
6 | * http://www.apache.org/licenses/LICENSE-2.0 | |
7 | * | |
8 | * Unless required by applicable law or agreed to in writing, software | |
9 | * distributed under the License is distributed on an "AS IS" BASIS, | |
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
11 | * See the License for the specific language governing permissions and | |
12 | * limitations under the License. | |
13 | */ | |
14 | ||
15 | #include <config.h> | |
16 | ||
17 | #include <getopt.h> | |
18 | #include <stdlib.h> | |
19 | #include <stdio.h> | |
20 | ||
b511690b | 21 | #include "bitmap.h" |
ac0630a2 | 22 | #include "command-line.h" |
67d9b930 | 23 | #include "daemon.h" |
ac0630a2 | 24 | #include "dirs.h" |
3e8a2ad1 | 25 | #include "openvswitch/dynamic-string.h" |
ac0630a2 | 26 | #include "fatal-signal.h" |
4edcdcf4 | 27 | #include "hash.h" |
ee89ea7b TW |
28 | #include "openvswitch/hmap.h" |
29 | #include "openvswitch/json.h" | |
8b2ed684 | 30 | #include "ovn/lex.h" |
06a26dd2 | 31 | #include "ovn/lib/logical-fields.h" |
281977f7 | 32 | #include "ovn/lib/ovn-dhcp.h" |
e3df8838 BP |
33 | #include "ovn/lib/ovn-nb-idl.h" |
34 | #include "ovn/lib/ovn-sb-idl.h" | |
218351dd | 35 | #include "ovn/lib/ovn-util.h" |
a6095f81 | 36 | #include "ovn/actions.h" |
064d7f84 | 37 | #include "packets.h" |
ac0630a2 | 38 | #include "poll-loop.h" |
5868eb24 | 39 | #include "smap.h" |
7a15be69 | 40 | #include "sset.h" |
ac0630a2 RB |
41 | #include "stream.h" |
42 | #include "stream-ssl.h" | |
7b303ff9 | 43 | #include "unixctl.h" |
ac0630a2 | 44 | #include "util.h" |
4edcdcf4 | 45 | #include "uuid.h" |
ac0630a2 RB |
46 | #include "openvswitch/vlog.h" |
47 | ||
2e2762d4 | 48 | VLOG_DEFINE_THIS_MODULE(ovn_northd); |
ac0630a2 | 49 | |
7b303ff9 AW |
50 | static unixctl_cb_func ovn_northd_exit; |
51 | ||
2e2762d4 | 52 | struct northd_context { |
f93818dd | 53 | struct ovsdb_idl *ovnnb_idl; |
ec78987f | 54 | struct ovsdb_idl *ovnsb_idl; |
f93818dd | 55 | struct ovsdb_idl_txn *ovnnb_txn; |
3c78b3ca | 56 | struct ovsdb_idl_txn *ovnsb_txn; |
f93818dd RB |
57 | }; |
58 | ||
ac0630a2 | 59 | static const char *ovnnb_db; |
ec78987f | 60 | static const char *ovnsb_db; |
ac0630a2 | 61 | |
8639f9be ND |
62 | #define MAC_ADDR_PREFIX 0x0A0000000000ULL |
63 | #define MAC_ADDR_SPACE 0xffffff | |
64 | ||
65 | /* MAC address management (macam) table of "struct eth_addr"s, that holds the | |
66 | * MAC addresses allocated by the OVN ipam module. */ | |
67 | static struct hmap macam = HMAP_INITIALIZER(&macam); | |
b511690b GS |
68 | |
69 | #define MAX_OVN_TAGS 4096 | |
880fcd14 BP |
70 | \f |
71 | /* Pipeline stages. */ | |
ac0630a2 | 72 | |
880fcd14 BP |
73 | /* The two pipelines in an OVN logical flow table. */ |
74 | enum ovn_pipeline { | |
75 | P_IN, /* Ingress pipeline. */ | |
76 | P_OUT /* Egress pipeline. */ | |
77 | }; | |
091e3af9 | 78 | |
880fcd14 BP |
79 | /* The two purposes for which ovn-northd uses OVN logical datapaths. */ |
80 | enum ovn_datapath_type { | |
81 | DP_SWITCH, /* OVN logical switch. */ | |
82 | DP_ROUTER /* OVN logical router. */ | |
091e3af9 JP |
83 | }; |
84 | ||
880fcd14 BP |
85 | /* Returns an "enum ovn_stage" built from the arguments. |
86 | * | |
87 | * (It's better to use ovn_stage_build() for type-safety reasons, but inline | |
88 | * functions can't be used in enums or switch cases.) */ | |
89 | #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \ | |
90 | (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE)) | |
91 | ||
92 | /* A stage within an OVN logical switch or router. | |
091e3af9 | 93 | * |
880fcd14 BP |
94 | * An "enum ovn_stage" indicates whether the stage is part of a logical switch |
95 | * or router, whether the stage is part of the ingress or egress pipeline, and | |
96 | * the table within that pipeline. The first three components are combined to | |
685f4dfe | 97 | * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2, |
880fcd14 BP |
98 | * S_ROUTER_OUT_DELIVERY. */ |
99 | enum ovn_stage { | |
1a03fc7d BS |
100 | #define PIPELINE_STAGES \ |
101 | /* Logical switch ingress stages. */ \ | |
102 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ | |
103 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ | |
104 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ | |
105 | PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ | |
106 | PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ | |
107 | PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ | |
108 | PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ | |
109 | PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \ | |
110 | PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \ | |
111 | PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \ | |
112 | PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \ | |
113 | PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \ | |
114 | PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \ | |
115 | PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \ | |
e0c9e58b JP |
116 | \ |
117 | /* Logical switch egress stages. */ \ | |
7a15be69 GS |
118 | PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ |
119 | PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ | |
120 | PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ | |
121 | PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ | |
122 | PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ | |
1a03fc7d BS |
123 | PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \ |
124 | PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \ | |
125 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \ | |
126 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \ | |
e0c9e58b JP |
127 | \ |
128 | /* Logical router ingress stages. */ \ | |
129 | PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ | |
130 | PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \ | |
cc4583aa GS |
131 | PIPELINE_STAGE(ROUTER, IN, DEFRAG, 2, "lr_in_defrag") \ |
132 | PIPELINE_STAGE(ROUTER, IN, UNSNAT, 3, "lr_in_unsnat") \ | |
133 | PIPELINE_STAGE(ROUTER, IN, DNAT, 4, "lr_in_dnat") \ | |
134 | PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 5, "lr_in_ip_routing") \ | |
135 | PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 6, "lr_in_arp_resolve") \ | |
41a15b71 MS |
136 | PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 7, "lr_in_gw_redirect") \ |
137 | PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 8, "lr_in_arp_request") \ | |
e0c9e58b JP |
138 | \ |
139 | /* Logical router egress stages. */ \ | |
06a26dd2 MS |
140 | PIPELINE_STAGE(ROUTER, OUT, UNDNAT, 0, "lr_out_undnat") \ |
141 | PIPELINE_STAGE(ROUTER, OUT, SNAT, 1, "lr_out_snat") \ | |
142 | PIPELINE_STAGE(ROUTER, OUT, EGR_LOOP, 2, "lr_out_egr_loop") \ | |
143 | PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 3, "lr_out_delivery") | |
880fcd14 BP |
144 | |
145 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
146 | S_##DP_TYPE##_##PIPELINE##_##STAGE \ | |
147 | = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE), | |
148 | PIPELINE_STAGES | |
149 | #undef PIPELINE_STAGE | |
091e3af9 JP |
150 | }; |
151 | ||
6bb4a18e JP |
152 | /* Due to various hard-coded priorities need to implement ACLs, the |
153 | * northbound database supports a smaller range of ACL priorities than | |
154 | * are available to logical flows. This value is added to an ACL | |
155 | * priority to determine the ACL's logical flow priority. */ | |
156 | #define OVN_ACL_PRI_OFFSET 1000 | |
157 | ||
06a26dd2 | 158 | /* Register definitions specific to switches. */ |
facf8652 | 159 | #define REGBIT_CONNTRACK_DEFRAG "reg0[0]" |
fa313a8c | 160 | #define REGBIT_CONNTRACK_COMMIT "reg0[1]" |
7a15be69 | 161 | #define REGBIT_CONNTRACK_NAT "reg0[2]" |
281977f7 | 162 | #define REGBIT_DHCP_OPTS_RESULT "reg0[3]" |
facf8652 | 163 | |
06a26dd2 MS |
164 | /* Register definitions for switches and routers. */ |
165 | #define REGBIT_NAT_REDIRECT "reg9[0]" | |
166 | /* Indicate that this packet has been recirculated using egress | |
167 | * loopback. This allows certain checks to be bypassed, such as a | |
168 | * logical router dropping packets with source IP address equals | |
169 | * one of the logical router's own IP addresses. */ | |
170 | #define REGBIT_EGRESS_LOOPBACK "reg9[1]" | |
171 | ||
880fcd14 BP |
172 | /* Returns an "enum ovn_stage" built from the arguments. */ |
173 | static enum ovn_stage | |
174 | ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline, | |
175 | uint8_t table) | |
176 | { | |
177 | return OVN_STAGE_BUILD(dp_type, pipeline, table); | |
178 | } | |
179 | ||
180 | /* Returns the pipeline to which 'stage' belongs. */ | |
181 | static enum ovn_pipeline | |
182 | ovn_stage_get_pipeline(enum ovn_stage stage) | |
183 | { | |
184 | return (stage >> 8) & 1; | |
185 | } | |
186 | ||
187 | /* Returns the table to which 'stage' belongs. */ | |
188 | static uint8_t | |
189 | ovn_stage_get_table(enum ovn_stage stage) | |
190 | { | |
191 | return stage & 0xff; | |
192 | } | |
193 | ||
194 | /* Returns a string name for 'stage'. */ | |
195 | static const char * | |
196 | ovn_stage_to_str(enum ovn_stage stage) | |
197 | { | |
198 | switch (stage) { | |
199 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
200 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME; | |
201 | PIPELINE_STAGES | |
202 | #undef PIPELINE_STAGE | |
203 | default: return "<unknown>"; | |
204 | } | |
205 | } | |
9a9961d2 BP |
206 | |
207 | /* Returns the type of the datapath to which a flow with the given 'stage' may | |
208 | * be added. */ | |
209 | static enum ovn_datapath_type | |
210 | ovn_stage_to_datapath_type(enum ovn_stage stage) | |
211 | { | |
212 | switch (stage) { | |
213 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
214 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE; | |
215 | PIPELINE_STAGES | |
216 | #undef PIPELINE_STAGE | |
217 | default: OVS_NOT_REACHED(); | |
218 | } | |
219 | } | |
880fcd14 | 220 | \f |
ac0630a2 RB |
221 | static void |
222 | usage(void) | |
223 | { | |
224 | printf("\ | |
225 | %s: OVN northbound management daemon\n\ | |
226 | usage: %s [OPTIONS]\n\ | |
227 | \n\ | |
228 | Options:\n\ | |
229 | --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\ | |
230 | (default: %s)\n\ | |
ec78987f | 231 | --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\ |
ac0630a2 RB |
232 | (default: %s)\n\ |
233 | -h, --help display this help message\n\ | |
234 | -o, --options list available options\n\ | |
235 | -V, --version display version information\n\ | |
60bdd011 | 236 | ", program_name, program_name, default_nb_db(), default_sb_db()); |
67d9b930 | 237 | daemon_usage(); |
ac0630a2 RB |
238 | vlog_usage(); |
239 | stream_usage("database", true, true, false); | |
240 | } | |
241 | \f | |
5868eb24 BP |
242 | struct tnlid_node { |
243 | struct hmap_node hmap_node; | |
244 | uint32_t tnlid; | |
245 | }; | |
246 | ||
247 | static void | |
248 | destroy_tnlids(struct hmap *tnlids) | |
4edcdcf4 | 249 | { |
4ec3d7c7 DDP |
250 | struct tnlid_node *node; |
251 | HMAP_FOR_EACH_POP (node, hmap_node, tnlids) { | |
5868eb24 BP |
252 | free(node); |
253 | } | |
254 | hmap_destroy(tnlids); | |
255 | } | |
256 | ||
257 | static void | |
258 | add_tnlid(struct hmap *set, uint32_t tnlid) | |
259 | { | |
260 | struct tnlid_node *node = xmalloc(sizeof *node); | |
261 | hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0)); | |
262 | node->tnlid = tnlid; | |
4edcdcf4 RB |
263 | } |
264 | ||
4edcdcf4 | 265 | static bool |
5868eb24 | 266 | tnlid_in_use(const struct hmap *set, uint32_t tnlid) |
4edcdcf4 | 267 | { |
5868eb24 BP |
268 | const struct tnlid_node *node; |
269 | HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) { | |
270 | if (node->tnlid == tnlid) { | |
271 | return true; | |
272 | } | |
273 | } | |
274 | return false; | |
275 | } | |
4edcdcf4 | 276 | |
5868eb24 BP |
277 | static uint32_t |
278 | allocate_tnlid(struct hmap *set, const char *name, uint32_t max, | |
279 | uint32_t *hint) | |
280 | { | |
281 | for (uint32_t tnlid = *hint + 1; tnlid != *hint; | |
282 | tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) { | |
283 | if (!tnlid_in_use(set, tnlid)) { | |
284 | add_tnlid(set, tnlid); | |
285 | *hint = tnlid; | |
286 | return tnlid; | |
287 | } | |
4edcdcf4 RB |
288 | } |
289 | ||
5868eb24 BP |
290 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
291 | VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); | |
292 | return 0; | |
293 | } | |
294 | \f | |
a6095f81 BS |
295 | struct ovn_chassis_qdisc_queues { |
296 | struct hmap_node key_node; | |
297 | uint32_t queue_id; | |
298 | struct uuid chassis_uuid; | |
299 | }; | |
300 | ||
301 | static void | |
302 | destroy_chassis_queues(struct hmap *set) | |
303 | { | |
304 | struct ovn_chassis_qdisc_queues *node; | |
305 | HMAP_FOR_EACH_POP (node, key_node, set) { | |
306 | free(node); | |
307 | } | |
308 | hmap_destroy(set); | |
309 | } | |
310 | ||
311 | static void | |
312 | add_chassis_queue(struct hmap *set, struct uuid *chassis_uuid, | |
313 | uint32_t queue_id) | |
314 | { | |
315 | struct ovn_chassis_qdisc_queues *node = xmalloc(sizeof *node); | |
316 | node->queue_id = queue_id; | |
317 | memcpy(&node->chassis_uuid, chassis_uuid, sizeof node->chassis_uuid); | |
318 | hmap_insert(set, &node->key_node, uuid_hash(chassis_uuid)); | |
319 | } | |
320 | ||
321 | static bool | |
322 | chassis_queueid_in_use(const struct hmap *set, struct uuid *chassis_uuid, | |
323 | uint32_t queue_id) | |
324 | { | |
325 | const struct ovn_chassis_qdisc_queues *node; | |
326 | HMAP_FOR_EACH_WITH_HASH (node, key_node, uuid_hash(chassis_uuid), set) { | |
327 | if (uuid_equals(chassis_uuid, &node->chassis_uuid) | |
328 | && node->queue_id == queue_id) { | |
329 | return true; | |
330 | } | |
331 | } | |
332 | return false; | |
333 | } | |
334 | ||
335 | static uint32_t | |
336 | allocate_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis) | |
337 | { | |
338 | for (uint32_t queue_id = QDISC_MIN_QUEUE_ID + 1; | |
339 | queue_id <= QDISC_MAX_QUEUE_ID; | |
340 | queue_id++) { | |
341 | if (!chassis_queueid_in_use(set, &chassis->header_.uuid, queue_id)) { | |
342 | add_chassis_queue(set, &chassis->header_.uuid, queue_id); | |
343 | return queue_id; | |
344 | } | |
345 | } | |
346 | ||
347 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
348 | VLOG_WARN_RL(&rl, "all %s queue ids exhausted", chassis->name); | |
349 | return 0; | |
350 | } | |
351 | ||
352 | static void | |
353 | free_chassis_queueid(struct hmap *set, struct sbrec_chassis *chassis, | |
354 | uint32_t queue_id) | |
355 | { | |
356 | struct ovn_chassis_qdisc_queues *node; | |
357 | HMAP_FOR_EACH_WITH_HASH (node, key_node, | |
358 | uuid_hash(&chassis->header_.uuid), | |
359 | set) { | |
360 | if (uuid_equals(&chassis->header_.uuid, &node->chassis_uuid) | |
361 | && node->queue_id == queue_id) { | |
362 | hmap_remove(set, &node->key_node); | |
363 | break; | |
364 | } | |
365 | } | |
366 | } | |
367 | ||
368 | static inline bool | |
369 | port_has_qos_params(const struct smap *opts) | |
370 | { | |
371 | return (smap_get(opts, "qos_max_rate") || | |
372 | smap_get(opts, "qos_burst")); | |
373 | } | |
374 | \f | |
161ea2c8 NS |
375 | |
376 | struct ipam_info { | |
377 | uint32_t start_ipv4; | |
378 | size_t total_ipv4s; | |
379 | unsigned long *allocated_ipv4s; /* A bitmap of allocated IPv4s */ | |
380 | }; | |
381 | ||
9975d7be BP |
382 | /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or |
383 | * sb->external_ids:logical-switch. */ | |
5868eb24 BP |
384 | struct ovn_datapath { |
385 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be | 386 | struct uuid key; /* (nbs/nbr)->header_.uuid. */ |
4edcdcf4 | 387 | |
9975d7be BP |
388 | const struct nbrec_logical_switch *nbs; /* May be NULL. */ |
389 | const struct nbrec_logical_router *nbr; /* May be NULL. */ | |
5868eb24 | 390 | const struct sbrec_datapath_binding *sb; /* May be NULL. */ |
4edcdcf4 | 391 | |
5868eb24 | 392 | struct ovs_list list; /* In list of similar records. */ |
4edcdcf4 | 393 | |
9975d7be | 394 | /* Logical switch data. */ |
86e98048 BP |
395 | struct ovn_port **router_ports; |
396 | size_t n_router_ports; | |
9975d7be | 397 | |
5868eb24 BP |
398 | struct hmap port_tnlids; |
399 | uint32_t port_key_hint; | |
400 | ||
401 | bool has_unknown; | |
8639f9be ND |
402 | |
403 | /* IPAM data. */ | |
161ea2c8 | 404 | struct ipam_info *ipam_info; |
41a15b71 MS |
405 | |
406 | /* OVN northd only needs to know about the logical router gateway port for | |
407 | * NAT on a distributed router. This "distributed gateway port" is | |
408 | * populated only when there is a "redirect-chassis" specified for one of | |
409 | * the ports on the logical router. Otherwise this will be NULL. */ | |
410 | struct ovn_port *l3dgw_port; | |
411 | /* The "derived" OVN port representing the instance of l3dgw_port on | |
412 | * the "redirect-chassis". */ | |
413 | struct ovn_port *l3redirect_port; | |
8639f9be ND |
414 | }; |
415 | ||
416 | struct macam_node { | |
417 | struct hmap_node hmap_node; | |
418 | struct eth_addr mac_addr; /* Allocated MAC address. */ | |
5868eb24 BP |
419 | }; |
420 | ||
8639f9be ND |
421 | static void |
422 | cleanup_macam(struct hmap *macam) | |
423 | { | |
424 | struct macam_node *node; | |
425 | HMAP_FOR_EACH_POP (node, hmap_node, macam) { | |
426 | free(node); | |
427 | } | |
428 | } | |
429 | ||
5868eb24 BP |
430 | static struct ovn_datapath * |
431 | ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, | |
9975d7be BP |
432 | const struct nbrec_logical_switch *nbs, |
433 | const struct nbrec_logical_router *nbr, | |
5868eb24 BP |
434 | const struct sbrec_datapath_binding *sb) |
435 | { | |
436 | struct ovn_datapath *od = xzalloc(sizeof *od); | |
437 | od->key = *key; | |
438 | od->sb = sb; | |
9975d7be BP |
439 | od->nbs = nbs; |
440 | od->nbr = nbr; | |
5868eb24 BP |
441 | hmap_init(&od->port_tnlids); |
442 | od->port_key_hint = 0; | |
443 | hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); | |
444 | return od; | |
445 | } | |
446 | ||
447 | static void | |
448 | ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) | |
449 | { | |
450 | if (od) { | |
451 | /* Don't remove od->list. It is used within build_datapaths() as a | |
452 | * private list and once we've exited that function it is not safe to | |
453 | * use it. */ | |
454 | hmap_remove(datapaths, &od->key_node); | |
455 | destroy_tnlids(&od->port_tnlids); | |
161ea2c8 NS |
456 | if (od->ipam_info) { |
457 | bitmap_free(od->ipam_info->allocated_ipv4s); | |
458 | free(od->ipam_info); | |
459 | } | |
86e98048 | 460 | free(od->router_ports); |
5868eb24 BP |
461 | free(od); |
462 | } | |
463 | } | |
464 | ||
9a9961d2 BP |
465 | /* Returns 'od''s datapath type. */ |
466 | static enum ovn_datapath_type | |
467 | ovn_datapath_get_type(const struct ovn_datapath *od) | |
468 | { | |
469 | return od->nbs ? DP_SWITCH : DP_ROUTER; | |
470 | } | |
471 | ||
5868eb24 BP |
472 | static struct ovn_datapath * |
473 | ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) | |
474 | { | |
475 | struct ovn_datapath *od; | |
476 | ||
477 | HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) { | |
478 | if (uuid_equals(uuid, &od->key)) { | |
479 | return od; | |
480 | } | |
481 | } | |
482 | return NULL; | |
483 | } | |
484 | ||
485 | static struct ovn_datapath * | |
486 | ovn_datapath_from_sbrec(struct hmap *datapaths, | |
487 | const struct sbrec_datapath_binding *sb) | |
488 | { | |
489 | struct uuid key; | |
490 | ||
9975d7be BP |
491 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
492 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
5868eb24 BP |
493 | return NULL; |
494 | } | |
495 | return ovn_datapath_find(datapaths, &key); | |
496 | } | |
497 | ||
5412db30 J |
498 | static bool |
499 | lrouter_is_enabled(const struct nbrec_logical_router *lrouter) | |
500 | { | |
501 | return !lrouter->enabled || *lrouter->enabled; | |
502 | } | |
503 | ||
161ea2c8 NS |
504 | static void |
505 | init_ipam_info_for_datapath(struct ovn_datapath *od) | |
506 | { | |
507 | if (!od->nbs) { | |
508 | return; | |
509 | } | |
510 | ||
511 | const char *subnet_str = smap_get(&od->nbs->other_config, "subnet"); | |
512 | if (!subnet_str) { | |
513 | return; | |
514 | } | |
515 | ||
516 | ovs_be32 subnet, mask; | |
517 | char *error = ip_parse_masked(subnet_str, &subnet, &mask); | |
518 | if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) { | |
519 | static struct vlog_rate_limit rl | |
520 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
521 | VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str); | |
522 | free(error); | |
523 | return; | |
524 | } | |
525 | ||
526 | od->ipam_info = xzalloc(sizeof *od->ipam_info); | |
527 | od->ipam_info->start_ipv4 = ntohl(subnet) + 1; | |
528 | od->ipam_info->total_ipv4s = ~ntohl(mask); | |
529 | od->ipam_info->allocated_ipv4s = | |
530 | bitmap_allocate(od->ipam_info->total_ipv4s); | |
531 | ||
532 | /* Mark first IP as taken */ | |
533 | bitmap_set1(od->ipam_info->allocated_ipv4s, 0); | |
534 | ||
535 | /* Check if there are any reserver IPs (list) to be excluded from IPAM */ | |
536 | const char *exclude_ip_list = smap_get(&od->nbs->other_config, | |
537 | "exclude_ips"); | |
538 | if (!exclude_ip_list) { | |
539 | return; | |
540 | } | |
541 | ||
542 | struct lexer lexer; | |
543 | lexer_init(&lexer, exclude_ip_list); | |
544 | /* exclude_ip_list could be in the format - | |
545 | * "10.0.0.4 10.0.0.10 10.0.0.20..10.0.0.50 10.0.0.100..10.0.0.110". | |
546 | */ | |
547 | lexer_get(&lexer); | |
548 | while (lexer.token.type != LEX_T_END) { | |
549 | if (lexer.token.type != LEX_T_INTEGER) { | |
550 | lexer_syntax_error(&lexer, "expecting address"); | |
551 | break; | |
552 | } | |
553 | uint32_t start = ntohl(lexer.token.value.ipv4); | |
554 | lexer_get(&lexer); | |
555 | ||
556 | uint32_t end = start + 1; | |
557 | if (lexer_match(&lexer, LEX_T_ELLIPSIS)) { | |
558 | if (lexer.token.type != LEX_T_INTEGER) { | |
559 | lexer_syntax_error(&lexer, "expecting address range"); | |
560 | break; | |
561 | } | |
562 | end = ntohl(lexer.token.value.ipv4) + 1; | |
563 | lexer_get(&lexer); | |
564 | } | |
565 | ||
566 | /* Clamp start...end to fit the subnet. */ | |
567 | start = MAX(od->ipam_info->start_ipv4, start); | |
568 | end = MIN(od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s, end); | |
569 | if (end > start) { | |
570 | bitmap_set_multiple(od->ipam_info->allocated_ipv4s, | |
571 | start - od->ipam_info->start_ipv4, | |
572 | end - start, 1); | |
573 | } else { | |
574 | lexer_error(&lexer, "excluded addresses not in subnet"); | |
575 | } | |
576 | } | |
577 | if (lexer.error) { | |
578 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
579 | VLOG_WARN_RL(&rl, "logical switch "UUID_FMT": bad exclude_ips (%s)", | |
580 | UUID_ARGS(&od->key), lexer.error); | |
581 | } | |
582 | lexer_destroy(&lexer); | |
583 | } | |
584 | ||
5868eb24 BP |
585 | static void |
586 | join_datapaths(struct northd_context *ctx, struct hmap *datapaths, | |
587 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
588 | struct ovs_list *both) | |
589 | { | |
590 | hmap_init(datapaths); | |
417e7e66 BW |
591 | ovs_list_init(sb_only); |
592 | ovs_list_init(nb_only); | |
593 | ovs_list_init(both); | |
5868eb24 BP |
594 | |
595 | const struct sbrec_datapath_binding *sb, *sb_next; | |
596 | SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { | |
597 | struct uuid key; | |
9975d7be BP |
598 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
599 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
600 | ovsdb_idl_txn_add_comment( | |
601 | ctx->ovnsb_txn, | |
602 | "deleting Datapath_Binding "UUID_FMT" that lacks " | |
603 | "external-ids:logical-switch and " | |
604 | "external-ids:logical-router", | |
605 | UUID_ARGS(&sb->header_.uuid)); | |
5868eb24 BP |
606 | sbrec_datapath_binding_delete(sb); |
607 | continue; | |
608 | } | |
609 | ||
610 | if (ovn_datapath_find(datapaths, &key)) { | |
611 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
9975d7be BP |
612 | VLOG_INFO_RL( |
613 | &rl, "deleting Datapath_Binding "UUID_FMT" with " | |
614 | "duplicate external-ids:logical-switch/router "UUID_FMT, | |
615 | UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key)); | |
5868eb24 BP |
616 | sbrec_datapath_binding_delete(sb); |
617 | continue; | |
618 | } | |
619 | ||
620 | struct ovn_datapath *od = ovn_datapath_create(datapaths, &key, | |
9975d7be | 621 | NULL, NULL, sb); |
417e7e66 | 622 | ovs_list_push_back(sb_only, &od->list); |
5868eb24 BP |
623 | } |
624 | ||
9975d7be BP |
625 | const struct nbrec_logical_switch *nbs; |
626 | NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) { | |
5868eb24 | 627 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
9975d7be | 628 | &nbs->header_.uuid); |
5868eb24 | 629 | if (od) { |
9975d7be | 630 | od->nbs = nbs; |
417e7e66 BW |
631 | ovs_list_remove(&od->list); |
632 | ovs_list_push_back(both, &od->list); | |
5868eb24 | 633 | } else { |
9975d7be BP |
634 | od = ovn_datapath_create(datapaths, &nbs->header_.uuid, |
635 | nbs, NULL, NULL); | |
417e7e66 | 636 | ovs_list_push_back(nb_only, &od->list); |
5868eb24 | 637 | } |
161ea2c8 NS |
638 | |
639 | init_ipam_info_for_datapath(od); | |
5868eb24 | 640 | } |
9975d7be BP |
641 | |
642 | const struct nbrec_logical_router *nbr; | |
643 | NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) { | |
5412db30 J |
644 | if (!lrouter_is_enabled(nbr)) { |
645 | continue; | |
646 | } | |
647 | ||
9975d7be BP |
648 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
649 | &nbr->header_.uuid); | |
650 | if (od) { | |
651 | if (!od->nbs) { | |
652 | od->nbr = nbr; | |
417e7e66 BW |
653 | ovs_list_remove(&od->list); |
654 | ovs_list_push_back(both, &od->list); | |
9975d7be BP |
655 | } else { |
656 | /* Can't happen! */ | |
657 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
658 | VLOG_WARN_RL(&rl, | |
659 | "duplicate UUID "UUID_FMT" in OVN_Northbound", | |
660 | UUID_ARGS(&nbr->header_.uuid)); | |
661 | continue; | |
662 | } | |
663 | } else { | |
664 | od = ovn_datapath_create(datapaths, &nbr->header_.uuid, | |
665 | NULL, nbr, NULL); | |
417e7e66 | 666 | ovs_list_push_back(nb_only, &od->list); |
9975d7be | 667 | } |
9975d7be | 668 | } |
5868eb24 BP |
669 | } |
670 | ||
671 | static uint32_t | |
672 | ovn_datapath_allocate_key(struct hmap *dp_tnlids) | |
673 | { | |
674 | static uint32_t hint; | |
675 | return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint); | |
676 | } | |
677 | ||
0bac7164 BP |
678 | /* Updates the southbound Datapath_Binding table so that it contains the |
679 | * logical switches and routers specified by the northbound database. | |
680 | * | |
681 | * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical | |
682 | * switch and router. */ | |
5868eb24 BP |
683 | static void |
684 | build_datapaths(struct northd_context *ctx, struct hmap *datapaths) | |
685 | { | |
686 | struct ovs_list sb_only, nb_only, both; | |
687 | ||
688 | join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both); | |
689 | ||
417e7e66 | 690 | if (!ovs_list_is_empty(&nb_only)) { |
5868eb24 BP |
691 | /* First index the in-use datapath tunnel IDs. */ |
692 | struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); | |
693 | struct ovn_datapath *od; | |
694 | LIST_FOR_EACH (od, list, &both) { | |
695 | add_tnlid(&dp_tnlids, od->sb->tunnel_key); | |
696 | } | |
697 | ||
698 | /* Add southbound record for each unmatched northbound record. */ | |
699 | LIST_FOR_EACH (od, list, &nb_only) { | |
700 | uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids); | |
701 | if (!tunnel_key) { | |
702 | break; | |
703 | } | |
704 | ||
705 | od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn); | |
706 | ||
0f8e9c12 BP |
707 | /* Get the logical-switch or logical-router UUID to set in |
708 | * external-ids. */ | |
5868eb24 | 709 | char uuid_s[UUID_LEN + 1]; |
9975d7be BP |
710 | sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key)); |
711 | const char *key = od->nbs ? "logical-switch" : "logical-router"; | |
0f8e9c12 BP |
712 | |
713 | /* Get name to set in external-ids. */ | |
714 | const char *name = od->nbs ? od->nbs->name : od->nbr->name; | |
715 | ||
716 | /* Set external-ids. */ | |
717 | struct smap ids = SMAP_INITIALIZER(&ids); | |
718 | smap_add(&ids, key, uuid_s); | |
719 | if (*name) { | |
720 | smap_add(&ids, "name", name); | |
721 | } | |
722 | sbrec_datapath_binding_set_external_ids(od->sb, &ids); | |
723 | smap_destroy(&ids); | |
5868eb24 BP |
724 | |
725 | sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key); | |
726 | } | |
727 | destroy_tnlids(&dp_tnlids); | |
728 | } | |
729 | ||
730 | /* Delete southbound records without northbound matches. */ | |
731 | struct ovn_datapath *od, *next; | |
732 | LIST_FOR_EACH_SAFE (od, next, list, &sb_only) { | |
417e7e66 | 733 | ovs_list_remove(&od->list); |
5868eb24 BP |
734 | sbrec_datapath_binding_delete(od->sb); |
735 | ovn_datapath_destroy(datapaths, od); | |
736 | } | |
737 | } | |
738 | \f | |
739 | struct ovn_port { | |
740 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be BP |
741 | char *key; /* nbs->name, nbr->name, sb->logical_port. */ |
742 | char *json_key; /* 'key', quoted for use in JSON. */ | |
5868eb24 | 743 | |
9975d7be BP |
744 | const struct sbrec_port_binding *sb; /* May be NULL. */ |
745 | ||
e93b43d6 | 746 | /* Logical switch port data. */ |
0ee00741 | 747 | const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */ |
e93b43d6 JP |
748 | |
749 | struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */ | |
750 | unsigned int n_lsp_addrs; | |
751 | ||
752 | struct lport_addresses *ps_addrs; /* Port security addresses. */ | |
753 | unsigned int n_ps_addrs; | |
754 | ||
9975d7be | 755 | /* Logical router port data. */ |
0ee00741 | 756 | const struct nbrec_logical_router_port *nbrp; /* May be NULL. */ |
e93b43d6 | 757 | |
4685e523 | 758 | struct lport_addresses lrp_networks; |
c9bdf7bd | 759 | |
41a15b71 MS |
760 | bool derived; /* Indicates whether this is an additional port |
761 | * derived from nbsp or nbrp. */ | |
762 | ||
ad386c3f BP |
763 | /* The port's peer: |
764 | * | |
765 | * - A switch port S of type "router" has a router port R as a peer, | |
766 | * and R in turn has S has its peer. | |
767 | * | |
768 | * - Two connected logical router ports have each other as peer. */ | |
9975d7be | 769 | struct ovn_port *peer; |
5868eb24 BP |
770 | |
771 | struct ovn_datapath *od; | |
772 | ||
773 | struct ovs_list list; /* In list of similar records. */ | |
774 | }; | |
775 | ||
776 | static struct ovn_port * | |
777 | ovn_port_create(struct hmap *ports, const char *key, | |
0ee00741 HK |
778 | const struct nbrec_logical_switch_port *nbsp, |
779 | const struct nbrec_logical_router_port *nbrp, | |
5868eb24 BP |
780 | const struct sbrec_port_binding *sb) |
781 | { | |
782 | struct ovn_port *op = xzalloc(sizeof *op); | |
9975d7be BP |
783 | |
784 | struct ds json_key = DS_EMPTY_INITIALIZER; | |
785 | json_string_escape(key, &json_key); | |
786 | op->json_key = ds_steal_cstr(&json_key); | |
787 | ||
788 | op->key = xstrdup(key); | |
5868eb24 | 789 | op->sb = sb; |
0ee00741 HK |
790 | op->nbsp = nbsp; |
791 | op->nbrp = nbrp; | |
41a15b71 | 792 | op->derived = false; |
5868eb24 BP |
793 | hmap_insert(ports, &op->key_node, hash_string(op->key, 0)); |
794 | return op; | |
795 | } | |
796 | ||
797 | static void | |
798 | ovn_port_destroy(struct hmap *ports, struct ovn_port *port) | |
799 | { | |
800 | if (port) { | |
801 | /* Don't remove port->list. It is used within build_ports() as a | |
802 | * private list and once we've exited that function it is not safe to | |
803 | * use it. */ | |
804 | hmap_remove(ports, &port->key_node); | |
e93b43d6 JP |
805 | |
806 | for (int i = 0; i < port->n_lsp_addrs; i++) { | |
807 | destroy_lport_addresses(&port->lsp_addrs[i]); | |
808 | } | |
809 | free(port->lsp_addrs); | |
810 | ||
811 | for (int i = 0; i < port->n_ps_addrs; i++) { | |
812 | destroy_lport_addresses(&port->ps_addrs[i]); | |
813 | } | |
814 | free(port->ps_addrs); | |
815 | ||
4685e523 | 816 | destroy_lport_addresses(&port->lrp_networks); |
9975d7be BP |
817 | free(port->json_key); |
818 | free(port->key); | |
5868eb24 BP |
819 | free(port); |
820 | } | |
821 | } | |
822 | ||
823 | static struct ovn_port * | |
824 | ovn_port_find(struct hmap *ports, const char *name) | |
825 | { | |
826 | struct ovn_port *op; | |
827 | ||
828 | HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) { | |
829 | if (!strcmp(op->key, name)) { | |
830 | return op; | |
831 | } | |
832 | } | |
833 | return NULL; | |
834 | } | |
835 | ||
836 | static uint32_t | |
837 | ovn_port_allocate_key(struct ovn_datapath *od) | |
838 | { | |
839 | return allocate_tnlid(&od->port_tnlids, "port", | |
840 | (1u << 15) - 1, &od->port_key_hint); | |
841 | } | |
842 | ||
41a15b71 MS |
843 | static char * |
844 | chassis_redirect_name(const char *port_name) | |
845 | { | |
846 | return xasprintf("cr-%s", port_name); | |
847 | } | |
848 | ||
8639f9be ND |
849 | static bool |
850 | ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn) | |
851 | { | |
852 | struct macam_node *macam_node; | |
853 | HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64), | |
854 | &macam) { | |
855 | if (eth_addr_equals(*ea, macam_node->mac_addr)) { | |
856 | if (warn) { | |
857 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
858 | VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT, | |
859 | ETH_ADDR_ARGS(macam_node->mac_addr)); | |
860 | } | |
861 | return true; | |
862 | } | |
863 | } | |
864 | return false; | |
865 | } | |
866 | ||
8639f9be ND |
867 | static void |
868 | ipam_insert_mac(struct eth_addr *ea, bool check) | |
869 | { | |
870 | if (!ea) { | |
871 | return; | |
872 | } | |
873 | ||
874 | uint64_t mac64 = eth_addr_to_uint64(*ea); | |
875 | /* If the new MAC was not assigned by this address management system or | |
876 | * check is true and the new MAC is a duplicate, do not insert it into the | |
877 | * macam hmap. */ | |
878 | if (((mac64 ^ MAC_ADDR_PREFIX) >> 24) | |
879 | || (check && ipam_is_duplicate_mac(ea, mac64, true))) { | |
880 | return; | |
881 | } | |
882 | ||
883 | struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node); | |
884 | new_macam_node->mac_addr = *ea; | |
885 | hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64)); | |
886 | } | |
887 | ||
888 | static void | |
161ea2c8 | 889 | ipam_insert_ip(struct ovn_datapath *od, uint32_t ip) |
8639f9be | 890 | { |
161ea2c8 | 891 | if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
892 | return; |
893 | } | |
894 | ||
161ea2c8 NS |
895 | if (ip >= od->ipam_info->start_ipv4 && |
896 | ip < (od->ipam_info->start_ipv4 + od->ipam_info->total_ipv4s)) { | |
897 | bitmap_set1(od->ipam_info->allocated_ipv4s, | |
898 | ip - od->ipam_info->start_ipv4); | |
8639f9be | 899 | } |
8639f9be ND |
900 | } |
901 | ||
902 | static void | |
903 | ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
904 | char *address) | |
905 | { | |
906 | if (!od || !op || !address || !strcmp(address, "unknown") | |
20418099 | 907 | || !strcmp(address, "router") || is_dynamic_lsp_address(address)) { |
8639f9be ND |
908 | return; |
909 | } | |
910 | ||
911 | struct lport_addresses laddrs; | |
912 | if (!extract_lsp_addresses(address, &laddrs)) { | |
913 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
914 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
915 | return; | |
916 | } | |
917 | ipam_insert_mac(&laddrs.ea, true); | |
918 | ||
919 | /* IP is only added to IPAM if the switch's subnet option | |
920 | * is set, whereas MAC is always added to MACAM. */ | |
161ea2c8 | 921 | if (!od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
922 | destroy_lport_addresses(&laddrs); |
923 | return; | |
924 | } | |
925 | ||
926 | for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) { | |
927 | uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr); | |
161ea2c8 | 928 | ipam_insert_ip(od, ip); |
8639f9be ND |
929 | } |
930 | ||
931 | destroy_lport_addresses(&laddrs); | |
932 | } | |
933 | ||
934 | static void | |
935 | ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op) | |
936 | { | |
937 | if (!od || !op) { | |
938 | return; | |
939 | } | |
940 | ||
941 | if (op->nbsp) { | |
942 | /* Add all the port's addresses to address data structures. */ | |
943 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { | |
944 | ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]); | |
945 | } | |
946 | if (op->nbsp->dynamic_addresses) { | |
947 | ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses); | |
948 | } | |
949 | } else if (op->nbrp) { | |
950 | struct lport_addresses lrp_networks; | |
951 | if (!extract_lrp_networks(op->nbrp, &lrp_networks)) { | |
952 | static struct vlog_rate_limit rl | |
953 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
954 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
955 | return; | |
956 | } | |
957 | ipam_insert_mac(&lrp_networks.ea, true); | |
958 | ||
959 | if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs | |
960 | || !smap_get(&op->peer->od->nbs->other_config, "subnet")) { | |
961 | destroy_lport_addresses(&lrp_networks); | |
962 | return; | |
963 | } | |
964 | ||
965 | for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) { | |
966 | uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr); | |
161ea2c8 | 967 | ipam_insert_ip(op->peer->od, ip); |
8639f9be ND |
968 | } |
969 | ||
970 | destroy_lport_addresses(&lrp_networks); | |
971 | } | |
972 | } | |
973 | ||
974 | static uint64_t | |
975 | ipam_get_unused_mac(void) | |
976 | { | |
977 | /* Stores the suffix of the most recently ipam-allocated MAC address. */ | |
978 | static uint32_t last_mac; | |
979 | ||
980 | uint64_t mac64; | |
981 | struct eth_addr mac; | |
982 | uint32_t mac_addr_suffix, i; | |
983 | for (i = 0; i < MAC_ADDR_SPACE - 1; i++) { | |
984 | /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */ | |
985 | mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1; | |
986 | mac64 = MAC_ADDR_PREFIX | mac_addr_suffix; | |
987 | eth_addr_from_uint64(mac64, &mac); | |
988 | if (!ipam_is_duplicate_mac(&mac, mac64, false)) { | |
989 | last_mac = mac_addr_suffix; | |
990 | break; | |
991 | } | |
992 | } | |
993 | ||
994 | if (i == MAC_ADDR_SPACE) { | |
995 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
996 | VLOG_WARN_RL(&rl, "MAC address space exhausted."); | |
997 | mac64 = 0; | |
998 | } | |
999 | ||
1000 | return mac64; | |
1001 | } | |
1002 | ||
1003 | static uint32_t | |
161ea2c8 | 1004 | ipam_get_unused_ip(struct ovn_datapath *od) |
8639f9be | 1005 | { |
161ea2c8 | 1006 | if (!od || !od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
8639f9be ND |
1007 | return 0; |
1008 | } | |
1009 | ||
161ea2c8 NS |
1010 | size_t new_ip_index = bitmap_scan(od->ipam_info->allocated_ipv4s, 0, 0, |
1011 | od->ipam_info->total_ipv4s - 1); | |
1012 | if (new_ip_index == od->ipam_info->total_ipv4s - 1) { | |
8639f9be ND |
1013 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); |
1014 | VLOG_WARN_RL( &rl, "Subnet address space has been exhausted."); | |
161ea2c8 | 1015 | return 0; |
8639f9be ND |
1016 | } |
1017 | ||
161ea2c8 | 1018 | return od->ipam_info->start_ipv4 + new_ip_index; |
8639f9be ND |
1019 | } |
1020 | ||
1021 | static bool | |
1022 | ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
161ea2c8 | 1023 | const char *addrspec) |
8639f9be ND |
1024 | { |
1025 | if (!od || !op || !op->nbsp) { | |
1026 | return false; | |
1027 | } | |
1028 | ||
161ea2c8 | 1029 | uint32_t ip = ipam_get_unused_ip(od); |
8639f9be ND |
1030 | if (!ip) { |
1031 | return false; | |
1032 | } | |
1033 | ||
1034 | struct eth_addr mac; | |
6374d518 LR |
1035 | bool check_mac; |
1036 | int n = 0; | |
1037 | ||
1038 | if (ovs_scan(addrspec, ETH_ADDR_SCAN_FMT" dynamic%n", | |
1039 | ETH_ADDR_SCAN_ARGS(mac), &n) | |
1040 | && addrspec[n] == '\0') { | |
1041 | check_mac = true; | |
1042 | } else { | |
1043 | uint64_t mac64 = ipam_get_unused_mac(); | |
1044 | if (!mac64) { | |
1045 | return false; | |
1046 | } | |
1047 | eth_addr_from_uint64(mac64, &mac); | |
1048 | check_mac = false; | |
8639f9be | 1049 | } |
8639f9be | 1050 | |
161ea2c8 | 1051 | /* Add MAC to MACAM and IP to IPAM bitmap if both addresses were allocated |
8639f9be | 1052 | * successfully. */ |
161ea2c8 | 1053 | ipam_insert_ip(od, ip); |
6374d518 | 1054 | ipam_insert_mac(&mac, check_mac); |
8639f9be ND |
1055 | |
1056 | char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT, | |
1057 | ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip))); | |
1058 | nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr); | |
1059 | free(new_addr); | |
1060 | ||
1061 | return true; | |
1062 | } | |
1063 | ||
1064 | static void | |
b511690b | 1065 | build_ipam(struct hmap *datapaths, struct hmap *ports) |
8639f9be ND |
1066 | { |
1067 | /* IPAM generally stands for IP address management. In non-virtualized | |
1068 | * world, MAC addresses come with the hardware. But, with virtualized | |
1069 | * workloads, they need to be assigned and managed. This function | |
1070 | * does both IP address management (ipam) and MAC address management | |
1071 | * (macam). */ | |
1072 | ||
8639f9be ND |
1073 | /* If the switch's other_config:subnet is set, allocate new addresses for |
1074 | * ports that have the "dynamic" keyword in their addresses column. */ | |
1075 | struct ovn_datapath *od; | |
1076 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
161ea2c8 NS |
1077 | if (!od->nbs || !od->ipam_info || !od->ipam_info->allocated_ipv4s) { |
1078 | continue; | |
1079 | } | |
1080 | ||
1081 | struct ovn_port *op; | |
1082 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
1083 | const struct nbrec_logical_switch_port *nbsp = | |
1084 | od->nbs->ports[i]; | |
1085 | ||
1086 | if (!nbsp) { | |
8639f9be ND |
1087 | continue; |
1088 | } | |
1089 | ||
161ea2c8 NS |
1090 | op = ovn_port_find(ports, nbsp->name); |
1091 | if (!op || (op->nbsp && op->peer)) { | |
1092 | /* Do not allocate addresses for logical switch ports that | |
1093 | * have a peer. */ | |
8639f9be ND |
1094 | continue; |
1095 | } | |
1096 | ||
161ea2c8 NS |
1097 | for (size_t j = 0; j < nbsp->n_addresses; j++) { |
1098 | if (is_dynamic_lsp_address(nbsp->addresses[j]) | |
1099 | && !nbsp->dynamic_addresses) { | |
1100 | if (!ipam_allocate_addresses(od, op, nbsp->addresses[j]) | |
1101 | || !extract_lsp_addresses(nbsp->dynamic_addresses, | |
1102 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1103 | static struct vlog_rate_limit rl | |
1104 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1105 | VLOG_INFO_RL(&rl, "Failed to allocate address."); | |
1106 | } else { | |
1107 | op->n_lsp_addrs++; | |
8639f9be | 1108 | } |
161ea2c8 | 1109 | break; |
8639f9be ND |
1110 | } |
1111 | } | |
161ea2c8 NS |
1112 | |
1113 | if (!nbsp->n_addresses && nbsp->dynamic_addresses) { | |
1114 | nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, | |
1115 | NULL); | |
1116 | } | |
8639f9be ND |
1117 | } |
1118 | } | |
1119 | } | |
1120 | \f | |
b511690b GS |
1121 | /* Tag allocation for nested containers. |
1122 | * | |
1123 | * For a logical switch port with 'parent_name' and a request to allocate tags, | |
1124 | * keeps a track of all allocated tags. */ | |
1125 | struct tag_alloc_node { | |
1126 | struct hmap_node hmap_node; | |
1127 | char *parent_name; | |
1128 | unsigned long *allocated_tags; /* A bitmap to track allocated tags. */ | |
1129 | }; | |
1130 | ||
1131 | static void | |
1132 | tag_alloc_destroy(struct hmap *tag_alloc_table) | |
1133 | { | |
1134 | struct tag_alloc_node *node; | |
1135 | HMAP_FOR_EACH_POP (node, hmap_node, tag_alloc_table) { | |
1136 | bitmap_free(node->allocated_tags); | |
1137 | free(node->parent_name); | |
1138 | free(node); | |
1139 | } | |
1140 | hmap_destroy(tag_alloc_table); | |
1141 | } | |
1142 | ||
1143 | static struct tag_alloc_node * | |
1144 | tag_alloc_get_node(struct hmap *tag_alloc_table, const char *parent_name) | |
1145 | { | |
1146 | /* If a node for the 'parent_name' exists, return it. */ | |
1147 | struct tag_alloc_node *tag_alloc_node; | |
1148 | HMAP_FOR_EACH_WITH_HASH (tag_alloc_node, hmap_node, | |
1149 | hash_string(parent_name, 0), | |
1150 | tag_alloc_table) { | |
1151 | if (!strcmp(tag_alloc_node->parent_name, parent_name)) { | |
1152 | return tag_alloc_node; | |
1153 | } | |
1154 | } | |
1155 | ||
1156 | /* Create a new node. */ | |
1157 | tag_alloc_node = xmalloc(sizeof *tag_alloc_node); | |
1158 | tag_alloc_node->parent_name = xstrdup(parent_name); | |
1159 | tag_alloc_node->allocated_tags = bitmap_allocate(MAX_OVN_TAGS); | |
1160 | /* Tag 0 is invalid for nested containers. */ | |
1161 | bitmap_set1(tag_alloc_node->allocated_tags, 0); | |
1162 | hmap_insert(tag_alloc_table, &tag_alloc_node->hmap_node, | |
1163 | hash_string(parent_name, 0)); | |
1164 | ||
1165 | return tag_alloc_node; | |
1166 | } | |
1167 | ||
1168 | static void | |
1169 | tag_alloc_add_existing_tags(struct hmap *tag_alloc_table, | |
1170 | const struct nbrec_logical_switch_port *nbsp) | |
1171 | { | |
1172 | /* Add the tags of already existing nested containers. If there is no | |
1173 | * 'nbsp->parent_name' or no 'nbsp->tag' set, there is nothing to do. */ | |
1174 | if (!nbsp->parent_name || !nbsp->parent_name[0] || !nbsp->tag) { | |
1175 | return; | |
1176 | } | |
1177 | ||
1178 | struct tag_alloc_node *tag_alloc_node; | |
1179 | tag_alloc_node = tag_alloc_get_node(tag_alloc_table, nbsp->parent_name); | |
1180 | bitmap_set1(tag_alloc_node->allocated_tags, *nbsp->tag); | |
1181 | } | |
1182 | ||
1183 | static void | |
1184 | tag_alloc_create_new_tag(struct hmap *tag_alloc_table, | |
1185 | const struct nbrec_logical_switch_port *nbsp) | |
1186 | { | |
1187 | if (!nbsp->tag_request) { | |
1188 | return; | |
1189 | } | |
1190 | ||
1191 | if (nbsp->parent_name && nbsp->parent_name[0] | |
1192 | && *nbsp->tag_request == 0) { | |
1193 | /* For nested containers that need allocation, do the allocation. */ | |
1194 | ||
1195 | if (nbsp->tag) { | |
1196 | /* This has already been allocated. */ | |
1197 | return; | |
1198 | } | |
1199 | ||
1200 | struct tag_alloc_node *tag_alloc_node; | |
1201 | int64_t tag; | |
1202 | tag_alloc_node = tag_alloc_get_node(tag_alloc_table, | |
1203 | nbsp->parent_name); | |
1204 | tag = bitmap_scan(tag_alloc_node->allocated_tags, 0, 1, MAX_OVN_TAGS); | |
1205 | if (tag == MAX_OVN_TAGS) { | |
1206 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
1207 | VLOG_ERR_RL(&rl, "out of vlans for logical switch ports with " | |
1208 | "parent %s", nbsp->parent_name); | |
1209 | return; | |
1210 | } | |
1211 | bitmap_set1(tag_alloc_node->allocated_tags, tag); | |
1212 | nbrec_logical_switch_port_set_tag(nbsp, &tag, 1); | |
1213 | } else if (*nbsp->tag_request != 0) { | |
1214 | /* For everything else, copy the contents of 'tag_request' to 'tag'. */ | |
1215 | nbrec_logical_switch_port_set_tag(nbsp, nbsp->tag_request, 1); | |
1216 | } | |
1217 | } | |
1218 | \f | |
8639f9be | 1219 | |
6c4f7a8a NS |
1220 | /* |
1221 | * This function checks if the MAC in "address" parameter (if present) is | |
1222 | * different from the one stored in Logical_Switch_Port.dynamic_addresses | |
1223 | * and updates it. | |
1224 | */ | |
1225 | static void | |
1226 | check_and_update_mac_in_dynamic_addresses( | |
1227 | const char *address, | |
1228 | const struct nbrec_logical_switch_port *nbsp) | |
1229 | { | |
1230 | if (!nbsp->dynamic_addresses) { | |
1231 | return; | |
1232 | } | |
1233 | int buf_index = 0; | |
1234 | struct eth_addr ea; | |
1235 | if (!ovs_scan_len(address, &buf_index, | |
1236 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea))) { | |
1237 | return; | |
1238 | } | |
1239 | ||
1240 | struct eth_addr present_ea; | |
1241 | buf_index = 0; | |
1242 | if (ovs_scan_len(nbsp->dynamic_addresses, &buf_index, | |
1243 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(present_ea)) | |
1244 | && !eth_addr_equals(ea, present_ea)) { | |
1245 | /* MAC address has changed. Update it */ | |
1246 | char *new_addr = xasprintf( | |
1247 | ETH_ADDR_FMT"%s", ETH_ADDR_ARGS(ea), | |
1248 |  ->dynamic_addresses[buf_index]); | |
1249 | nbrec_logical_switch_port_set_dynamic_addresses( | |
1250 | nbsp, new_addr); | |
1251 | free(new_addr); | |
1252 | } | |
1253 | } | |
1254 | ||
5868eb24 BP |
1255 | static void |
1256 | join_logical_ports(struct northd_context *ctx, | |
1257 | struct hmap *datapaths, struct hmap *ports, | |
a6095f81 | 1258 | struct hmap *chassis_qdisc_queues, |
b511690b GS |
1259 | struct hmap *tag_alloc_table, struct ovs_list *sb_only, |
1260 | struct ovs_list *nb_only, struct ovs_list *both) | |
5868eb24 BP |
1261 | { |
1262 | hmap_init(ports); | |
417e7e66 BW |
1263 | ovs_list_init(sb_only); |
1264 | ovs_list_init(nb_only); | |
1265 | ovs_list_init(both); | |
5868eb24 BP |
1266 | |
1267 | const struct sbrec_port_binding *sb; | |
1268 | SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) { | |
1269 | struct ovn_port *op = ovn_port_create(ports, sb->logical_port, | |
9975d7be | 1270 | NULL, NULL, sb); |
417e7e66 | 1271 | ovs_list_push_back(sb_only, &op->list); |
5868eb24 BP |
1272 | } |
1273 | ||
1274 | struct ovn_datapath *od; | |
1275 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
1276 | if (od->nbs) { |
1277 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
0ee00741 HK |
1278 | const struct nbrec_logical_switch_port *nbsp |
1279 | = od->nbs->ports[i]; | |
1280 | struct ovn_port *op = ovn_port_find(ports, nbsp->name); | |
9975d7be | 1281 | if (op) { |
0ee00741 | 1282 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
1283 | static struct vlog_rate_limit rl |
1284 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
1285 | VLOG_WARN_RL(&rl, "duplicate logical port %s", | |
0ee00741 | 1286 | nbsp->name); |
9975d7be BP |
1287 | continue; |
1288 | } | |
0ee00741 | 1289 | op->nbsp = nbsp; |
417e7e66 | 1290 | ovs_list_remove(&op->list); |
a6095f81 BS |
1291 | |
1292 | uint32_t queue_id = smap_get_int(&op->sb->options, | |
1293 | "qdisc_queue_id", 0); | |
1294 | if (queue_id && op->sb->chassis) { | |
1295 | add_chassis_queue( | |
1296 | chassis_qdisc_queues, &op->sb->chassis->header_.uuid, | |
1297 | queue_id); | |
1298 | } | |
1299 | ||
417e7e66 | 1300 | ovs_list_push_back(both, &op->list); |
e93b43d6 JP |
1301 | |
1302 | /* This port exists due to a SB binding, but should | |
1303 | * not have been initialized fully. */ | |
1304 | ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs); | |
9975d7be | 1305 | } else { |
0ee00741 | 1306 | op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL); |
417e7e66 | 1307 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
1308 | } |
1309 | ||
e93b43d6 | 1310 | op->lsp_addrs |
0ee00741 HK |
1311 | = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses); |
1312 | for (size_t j = 0; j < nbsp->n_addresses; j++) { | |
20418099 MS |
1313 | if (!strcmp(nbsp->addresses[j], "unknown") |
1314 | || !strcmp(nbsp->addresses[j], "router")) { | |
e93b43d6 JP |
1315 | continue; |
1316 | } | |
6374d518 | 1317 | if (is_dynamic_lsp_address(nbsp->addresses[j])) { |
8639f9be | 1318 | if (nbsp->dynamic_addresses) { |
6c4f7a8a NS |
1319 | check_and_update_mac_in_dynamic_addresses( |
1320 | nbsp->addresses[j], nbsp); | |
8639f9be ND |
1321 | if (!extract_lsp_addresses(nbsp->dynamic_addresses, |
1322 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1323 | static struct vlog_rate_limit rl | |
1324 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1325 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in " | |
1326 | "logical switch port " | |
1327 | "dynamic_addresses. No " | |
1328 | "MAC address found", | |
1329 | op->nbsp->dynamic_addresses); | |
1330 | continue; | |
1331 | } | |
1332 | } else { | |
1333 | continue; | |
1334 | } | |
1335 | } else if (!extract_lsp_addresses(nbsp->addresses[j], | |
e93b43d6 JP |
1336 | &op->lsp_addrs[op->n_lsp_addrs])) { |
1337 | static struct vlog_rate_limit rl | |
1338 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1339 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical " | |
1340 | "switch port addresses. No MAC " | |
1341 | "address found", | |
0ee00741 | 1342 | op->nbsp->addresses[j]); |
e93b43d6 JP |
1343 | continue; |
1344 | } | |
1345 | op->n_lsp_addrs++; | |
1346 | } | |
1347 | ||
1348 | op->ps_addrs | |
0ee00741 HK |
1349 | = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security); |
1350 | for (size_t j = 0; j < nbsp->n_port_security; j++) { | |
1351 | if (!extract_lsp_addresses(nbsp->port_security[j], | |
e93b43d6 JP |
1352 | &op->ps_addrs[op->n_ps_addrs])) { |
1353 | static struct vlog_rate_limit rl | |
1354 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1355 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in port " | |
1356 | "security. No MAC address found", | |
0ee00741 | 1357 | op->nbsp->port_security[j]); |
e93b43d6 JP |
1358 | continue; |
1359 | } | |
1360 | op->n_ps_addrs++; | |
1361 | } | |
1362 | ||
9975d7be | 1363 | op->od = od; |
8639f9be | 1364 | ipam_add_port_addresses(od, op); |
b511690b | 1365 | tag_alloc_add_existing_tags(tag_alloc_table, nbsp); |
9975d7be BP |
1366 | } |
1367 | } else { | |
1368 | for (size_t i = 0; i < od->nbr->n_ports; i++) { | |
0ee00741 HK |
1369 | const struct nbrec_logical_router_port *nbrp |
1370 | = od->nbr->ports[i]; | |
9975d7be | 1371 | |
4685e523 | 1372 | struct lport_addresses lrp_networks; |
0ee00741 | 1373 | if (!extract_lrp_networks(nbrp, &lrp_networks)) { |
9975d7be BP |
1374 | static struct vlog_rate_limit rl |
1375 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
0ee00741 | 1376 | VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac); |
9975d7be BP |
1377 | continue; |
1378 | } | |
1379 | ||
4685e523 | 1380 | if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) { |
9975d7be BP |
1381 | continue; |
1382 | } | |
1383 | ||
0ee00741 | 1384 | struct ovn_port *op = ovn_port_find(ports, nbrp->name); |
9975d7be | 1385 | if (op) { |
0ee00741 | 1386 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
1387 | static struct vlog_rate_limit rl |
1388 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
1389 | VLOG_WARN_RL(&rl, "duplicate logical router port %s", | |
0ee00741 | 1390 | nbrp->name); |
9975d7be BP |
1391 | continue; |
1392 | } | |
0ee00741 | 1393 | op->nbrp = nbrp; |
417e7e66 BW |
1394 | ovs_list_remove(&op->list); |
1395 | ovs_list_push_back(both, &op->list); | |
4685e523 JP |
1396 | |
1397 | /* This port exists but should not have been | |
1398 | * initialized fully. */ | |
1399 | ovs_assert(!op->lrp_networks.n_ipv4_addrs | |
1400 | && !op->lrp_networks.n_ipv6_addrs); | |
9975d7be | 1401 | } else { |
0ee00741 | 1402 | op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL); |
417e7e66 | 1403 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
1404 | } |
1405 | ||
4685e523 | 1406 | op->lrp_networks = lrp_networks; |
9975d7be | 1407 | op->od = od; |
8639f9be | 1408 | ipam_add_port_addresses(op->od, op); |
41a15b71 MS |
1409 | |
1410 | const char *redirect_chassis = smap_get(&op->nbrp->options, | |
1411 | "redirect-chassis"); | |
1412 | if (redirect_chassis) { | |
1413 | /* Additional "derived" ovn_port crp represents the | |
1414 | * instance of op on the "redirect-chassis". */ | |
1415 | const char *gw_chassis = smap_get(&op->od->nbr->options, | |
1416 | "chassis"); | |
1417 | if (gw_chassis) { | |
1418 | static struct vlog_rate_limit rl | |
1419 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1420 | VLOG_WARN_RL(&rl, "Bad configuration: " | |
1421 | "redirect-chassis configured on port %s " | |
1422 | "on L3 gateway router", nbrp->name); | |
1423 | continue; | |
1424 | } | |
26b9e08d MS |
1425 | if (od->l3dgw_port || od->l3redirect_port) { |
1426 | static struct vlog_rate_limit rl | |
1427 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1428 | VLOG_WARN_RL(&rl, "Bad configuration: multiple ports " | |
1429 | "with redirect-chassis on same logical " | |
1430 | "router %s", od->nbr->name); | |
1431 | continue; | |
1432 | } | |
1433 | ||
41a15b71 MS |
1434 | char *redirect_name = chassis_redirect_name(nbrp->name); |
1435 | struct ovn_port *crp = ovn_port_find(ports, redirect_name); | |
1436 | if (crp) { | |
1437 | crp->derived = true; | |
1438 | crp->nbrp = nbrp; | |
1439 | ovs_list_remove(&crp->list); | |
1440 | ovs_list_push_back(both, &crp->list); | |
1441 | } else { | |
1442 | crp = ovn_port_create(ports, redirect_name, | |
1443 | NULL, nbrp, NULL); | |
1444 | crp->derived = true; | |
1445 | ovs_list_push_back(nb_only, &crp->list); | |
1446 | } | |
1447 | crp->od = od; | |
1448 | free(redirect_name); | |
1449 | ||
1450 | /* Set l3dgw_port and l3redirect_port in od, for later | |
1451 | * use during flow creation. */ | |
26b9e08d MS |
1452 | od->l3dgw_port = op; |
1453 | od->l3redirect_port = crp; | |
41a15b71 | 1454 | } |
5868eb24 | 1455 | } |
9975d7be BP |
1456 | } |
1457 | } | |
1458 | ||
1459 | /* Connect logical router ports, and logical switch ports of type "router", | |
1460 | * to their peers. */ | |
1461 | struct ovn_port *op; | |
1462 | HMAP_FOR_EACH (op, key_node, ports) { | |
41a15b71 | 1463 | if (op->nbsp && !strcmp(op->nbsp->type, "router") && !op->derived) { |
0ee00741 | 1464 | const char *peer_name = smap_get(&op->nbsp->options, "router-port"); |
9975d7be BP |
1465 | if (!peer_name) { |
1466 | continue; | |
1467 | } | |
1468 | ||
1469 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 1470 | if (!peer || !peer->nbrp) { |
9975d7be BP |
1471 | continue; |
1472 | } | |
1473 | ||
1474 | peer->peer = op; | |
1475 | op->peer = peer; | |
86e98048 BP |
1476 | op->od->router_ports = xrealloc( |
1477 | op->od->router_ports, | |
1478 | sizeof *op->od->router_ports * (op->od->n_router_ports + 1)); | |
1479 | op->od->router_ports[op->od->n_router_ports++] = op; | |
20418099 MS |
1480 | |
1481 | /* Fill op->lsp_addrs for op->nbsp->addresses[] with | |
1482 | * contents "router", which was skipped in the loop above. */ | |
1483 | for (size_t j = 0; j < op->nbsp->n_addresses; j++) { | |
1484 | if (!strcmp(op->nbsp->addresses[j], "router")) { | |
1485 | if (extract_lrp_networks(peer->nbrp, | |
1486 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1487 | op->n_lsp_addrs++; | |
1488 | } | |
1489 | break; | |
1490 | } | |
1491 | } | |
41a15b71 | 1492 | } else if (op->nbrp && op->nbrp->peer && !op->derived) { |
ad386c3f BP |
1493 | struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer); |
1494 | if (peer) { | |
1495 | if (peer->nbrp) { | |
1496 | op->peer = peer; | |
60fa6dbb | 1497 | } else if (peer->nbsp) { |
ad386c3f BP |
1498 | /* An ovn_port for a switch port of type "router" does have |
1499 | * a router port as its peer (see the case above for | |
1500 | * "router" ports), but this is set via options:router-port | |
1501 | * in Logical_Switch_Port and does not involve the | |
1502 | * Logical_Router_Port's 'peer' column. */ | |
1503 | static struct vlog_rate_limit rl = | |
1504 | VLOG_RATE_LIMIT_INIT(5, 1); | |
1505 | VLOG_WARN_RL(&rl, "Bad configuration: The peer of router " | |
1506 | "port %s is a switch port", op->key); | |
1507 | } | |
1508 | } | |
5868eb24 BP |
1509 | } |
1510 | } | |
1511 | } | |
1512 | ||
e914fb54 MS |
1513 | static void |
1514 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
1515 | uint16_t *port); | |
1516 | ||
1517 | static void | |
1518 | get_router_load_balancer_ips(const struct ovn_datapath *od, | |
1519 | struct sset *all_ips) | |
1520 | { | |
1521 | if (!od->nbr) { | |
1522 | return; | |
1523 | } | |
1524 | ||
1525 | for (int i = 0; i < od->nbr->n_load_balancer; i++) { | |
1526 | struct nbrec_load_balancer *lb = od->nbr->load_balancer[i]; | |
1527 | struct smap *vips = &lb->vips; | |
1528 | struct smap_node *node; | |
1529 | ||
1530 | SMAP_FOR_EACH (node, vips) { | |
1531 | /* node->key contains IP:port or just IP. */ | |
1532 | char *ip_address = NULL; | |
1533 | uint16_t port; | |
1534 | ||
1535 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
1536 | if (!ip_address) { | |
1537 | continue; | |
1538 | } | |
1539 | ||
1540 | if (!sset_contains(all_ips, ip_address)) { | |
1541 | sset_add(all_ips, ip_address); | |
1542 | } | |
1543 | ||
1544 | free(ip_address); | |
1545 | } | |
1546 | } | |
1547 | } | |
1548 | ||
f40c5588 MS |
1549 | /* Returns an array of strings, each consisting of a MAC address followed |
1550 | * by one or more IP addresses, and if the port is a distributed gateway | |
1551 | * port, followed by 'is_chassis_resident("LPORT_NAME")', where the | |
1552 | * LPORT_NAME is the name of the L3 redirect port or the name of the | |
1553 | * logical_port specified in a NAT rule. These strings include the | |
1554 | * external IP addresses of all NAT rules defined on that router, and all | |
1555 | * of the IP addresses used in load balancer VIPs defined on that router. | |
e914fb54 | 1556 | * |
f40c5588 MS |
1557 | * The caller must free each of the n returned strings with free(), |
1558 | * and must free the returned array when it is no longer needed. */ | |
1559 | static char ** | |
1560 | get_nat_addresses(const struct ovn_port *op, size_t *n) | |
e914fb54 | 1561 | { |
f40c5588 | 1562 | size_t n_nats = 0; |
e914fb54 MS |
1563 | struct eth_addr mac; |
1564 | if (!op->nbrp || !op->od || !op->od->nbr | |
1565 | || (!op->od->nbr->n_nat && !op->od->nbr->n_load_balancer) | |
1566 | || !eth_addr_from_string(op->nbrp->mac, &mac)) { | |
f40c5588 | 1567 | *n = n_nats; |
e914fb54 MS |
1568 | return NULL; |
1569 | } | |
1570 | ||
f40c5588 MS |
1571 | struct ds c_addresses = DS_EMPTY_INITIALIZER; |
1572 | ds_put_format(&c_addresses, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); | |
1573 | bool central_ip_address = false; | |
1574 | ||
1575 | char **addresses; | |
1576 | addresses = xmalloc(sizeof *addresses * (op->od->nbr->n_nat + 1)); | |
e914fb54 MS |
1577 | |
1578 | /* Get NAT IP addresses. */ | |
f40c5588 | 1579 | for (size_t i = 0; i < op->od->nbr->n_nat; i++) { |
e914fb54 MS |
1580 | const struct nbrec_nat *nat = op->od->nbr->nat[i]; |
1581 | ovs_be32 ip, mask; | |
1582 | ||
1583 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
1584 | if (error || mask != OVS_BE32_MAX) { | |
1585 | free(error); | |
1586 | continue; | |
1587 | } | |
26b9e08d MS |
1588 | |
1589 | /* Determine whether this NAT rule satisfies the conditions for | |
1590 | * distributed NAT processing. */ | |
1591 | if (op->od->l3redirect_port && !strcmp(nat->type, "dnat_and_snat") | |
1592 | && nat->logical_port && nat->external_mac) { | |
1593 | /* Distributed NAT rule. */ | |
f40c5588 MS |
1594 | if (eth_addr_from_string(nat->external_mac, &mac)) { |
1595 | struct ds address = DS_EMPTY_INITIALIZER; | |
1596 | ds_put_format(&address, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); | |
1597 | ds_put_format(&address, " %s", nat->external_ip); | |
1598 | ds_put_format(&address, " is_chassis_resident(\"%s\")", | |
1599 | nat->logical_port); | |
1600 | addresses[n_nats++] = ds_steal_cstr(&address); | |
1601 | } | |
26b9e08d MS |
1602 | } else { |
1603 | /* Centralized NAT rule, either on gateway router or distributed | |
1604 | * router. */ | |
f40c5588 MS |
1605 | ds_put_format(&c_addresses, " %s", nat->external_ip); |
1606 | central_ip_address = true; | |
26b9e08d | 1607 | } |
e914fb54 MS |
1608 | } |
1609 | ||
1610 | /* A set to hold all load-balancer vips. */ | |
1611 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
1612 | get_router_load_balancer_ips(op->od, &all_ips); | |
1613 | ||
1614 | const char *ip_address; | |
1615 | SSET_FOR_EACH (ip_address, &all_ips) { | |
f40c5588 MS |
1616 | ds_put_format(&c_addresses, " %s", ip_address); |
1617 | central_ip_address = true; | |
e914fb54 MS |
1618 | } |
1619 | sset_destroy(&all_ips); | |
1620 | ||
f40c5588 MS |
1621 | if (central_ip_address) { |
1622 | /* Gratuitous ARP for centralized NAT rules on distributed gateway | |
1623 | * ports should be restricted to the "redirect-chassis". */ | |
1624 | if (op->od->l3redirect_port) { | |
1625 | ds_put_format(&c_addresses, " is_chassis_resident(%s)", | |
1626 | op->od->l3redirect_port->json_key); | |
1627 | } | |
1628 | ||
1629 | addresses[n_nats++] = ds_steal_cstr(&c_addresses); | |
26b9e08d MS |
1630 | } |
1631 | ||
f40c5588 MS |
1632 | *n = n_nats; |
1633 | ||
1634 | return addresses; | |
e914fb54 MS |
1635 | } |
1636 | ||
5868eb24 | 1637 | static void |
a6095f81 BS |
1638 | ovn_port_update_sbrec(const struct ovn_port *op, |
1639 | struct hmap *chassis_qdisc_queues) | |
5868eb24 BP |
1640 | { |
1641 | sbrec_port_binding_set_datapath(op->sb, op->od->sb); | |
0ee00741 | 1642 | if (op->nbrp) { |
c1645003 | 1643 | /* If the router is for l3 gateway, it resides on a chassis |
17bac0ff | 1644 | * and its port type is "l3gateway". */ |
c1645003 | 1645 | const char *chassis = smap_get(&op->od->nbr->options, "chassis"); |
41a15b71 MS |
1646 | if (op->derived) { |
1647 | sbrec_port_binding_set_type(op->sb, "chassisredirect"); | |
1648 | } else if (chassis) { | |
17bac0ff | 1649 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1650 | } else { |
1651 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1652 | } | |
9975d7be | 1653 | |
c1645003 GS |
1654 | struct smap new; |
1655 | smap_init(&new); | |
41a15b71 MS |
1656 | if (op->derived) { |
1657 | const char *redirect_chassis = smap_get(&op->nbrp->options, | |
1658 | "redirect-chassis"); | |
1659 | if (redirect_chassis) { | |
1660 | smap_add(&new, "redirect-chassis", redirect_chassis); | |
1661 | } | |
1662 | smap_add(&new, "distributed-port", op->nbrp->name); | |
1663 | } else { | |
1664 | const char *peer = op->peer ? op->peer->key : "<error>"; | |
1665 | smap_add(&new, "peer", peer); | |
1666 | if (chassis) { | |
1667 | smap_add(&new, "l3gateway-chassis", chassis); | |
1668 | } | |
c1645003 GS |
1669 | } |
1670 | sbrec_port_binding_set_options(op->sb, &new); | |
1671 | smap_destroy(&new); | |
9975d7be BP |
1672 | |
1673 | sbrec_port_binding_set_parent_port(op->sb, NULL); | |
1674 | sbrec_port_binding_set_tag(op->sb, NULL, 0); | |
1675 | sbrec_port_binding_set_mac(op->sb, NULL, 0); | |
1676 | } else { | |
0ee00741 | 1677 | if (strcmp(op->nbsp->type, "router")) { |
a6095f81 BS |
1678 | uint32_t queue_id = smap_get_int( |
1679 | &op->sb->options, "qdisc_queue_id", 0); | |
1680 | bool has_qos = port_has_qos_params(&op->nbsp->options); | |
1681 | struct smap options; | |
1682 | ||
1683 | if (op->sb->chassis && has_qos && !queue_id) { | |
1684 | queue_id = allocate_chassis_queueid(chassis_qdisc_queues, | |
1685 | op->sb->chassis); | |
1686 | } else if (!has_qos && queue_id) { | |
1687 | free_chassis_queueid(chassis_qdisc_queues, | |
1688 | op->sb->chassis, | |
1689 | queue_id); | |
1690 | queue_id = 0; | |
1691 | } | |
1692 | ||
1693 | smap_clone(&options, &op->nbsp->options); | |
1694 | if (queue_id) { | |
1695 | smap_add_format(&options, | |
1696 | "qdisc_queue_id", "%d", queue_id); | |
1697 | } | |
1698 | sbrec_port_binding_set_options(op->sb, &options); | |
1699 | smap_destroy(&options); | |
0ee00741 | 1700 | sbrec_port_binding_set_type(op->sb, op->nbsp->type); |
9975d7be | 1701 | } else { |
c1645003 GS |
1702 | const char *chassis = NULL; |
1703 | if (op->peer && op->peer->od && op->peer->od->nbr) { | |
1704 | chassis = smap_get(&op->peer->od->nbr->options, "chassis"); | |
1705 | } | |
1706 | ||
1707 | /* A switch port connected to a gateway router is also of | |
17bac0ff | 1708 | * type "l3gateway". */ |
c1645003 | 1709 | if (chassis) { |
17bac0ff | 1710 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1711 | } else { |
1712 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1713 | } | |
9975d7be | 1714 | |
f99f67bd BP |
1715 | const char *router_port = smap_get_def(&op->nbsp->options, |
1716 | "router-port", "<error>"); | |
c1645003 GS |
1717 | struct smap new; |
1718 | smap_init(&new); | |
1719 | smap_add(&new, "peer", router_port); | |
1720 | if (chassis) { | |
17bac0ff | 1721 | smap_add(&new, "l3gateway-chassis", chassis); |
c1645003 | 1722 | } |
f40c5588 MS |
1723 | sbrec_port_binding_set_options(op->sb, &new); |
1724 | smap_destroy(&new); | |
8439c2eb CSV |
1725 | |
1726 | const char *nat_addresses = smap_get(&op->nbsp->options, | |
1727 | "nat-addresses"); | |
e914fb54 | 1728 | if (nat_addresses && !strcmp(nat_addresses, "router")) { |
26b9e08d MS |
1729 | if (op->peer && op->peer->od |
1730 | && (chassis || op->peer->od->l3redirect_port)) { | |
f40c5588 MS |
1731 | size_t n_nats; |
1732 | char **nats = get_nat_addresses(op->peer, &n_nats); | |
1733 | if (n_nats) { | |
1734 | sbrec_port_binding_set_nat_addresses(op->sb, | |
1735 | (const char **) nats, n_nats); | |
1736 | for (size_t i = 0; i < n_nats; i++) { | |
1737 | free(nats[i]); | |
1738 | } | |
e914fb54 | 1739 | free(nats); |
f40c5588 MS |
1740 | } else { |
1741 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
e914fb54 | 1742 | } |
f40c5588 MS |
1743 | } else { |
1744 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
e914fb54 | 1745 | } |
26b9e08d MS |
1746 | /* Only accept manual specification of ethernet address |
1747 | * followed by IPv4 addresses on type "l3gateway" ports. */ | |
1748 | } else if (nat_addresses && chassis) { | |
8439c2eb CSV |
1749 | struct lport_addresses laddrs; |
1750 | if (!extract_lsp_addresses(nat_addresses, &laddrs)) { | |
1751 | static struct vlog_rate_limit rl = | |
1752 | VLOG_RATE_LIMIT_INIT(1, 1); | |
1753 | VLOG_WARN_RL(&rl, "Error extracting nat-addresses."); | |
f40c5588 | 1754 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); |
8439c2eb | 1755 | } else { |
f40c5588 MS |
1756 | sbrec_port_binding_set_nat_addresses(op->sb, |
1757 | &nat_addresses, 1); | |
8439c2eb CSV |
1758 | destroy_lport_addresses(&laddrs); |
1759 | } | |
f40c5588 MS |
1760 | } else { |
1761 | sbrec_port_binding_set_nat_addresses(op->sb, NULL, 0); | |
8439c2eb | 1762 | } |
9975d7be | 1763 | } |
0ee00741 HK |
1764 | sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name); |
1765 | sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag); | |
1766 | sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses, | |
1767 | op->nbsp->n_addresses); | |
9975d7be | 1768 | } |
5868eb24 BP |
1769 | } |
1770 | ||
6e31816f CSV |
1771 | /* Remove mac_binding entries that refer to logical_ports which are |
1772 | * deleted. */ | |
1773 | static void | |
1774 | cleanup_mac_bindings(struct northd_context *ctx, struct hmap *ports) | |
1775 | { | |
1776 | const struct sbrec_mac_binding *b, *n; | |
1777 | SBREC_MAC_BINDING_FOR_EACH_SAFE (b, n, ctx->ovnsb_idl) { | |
1778 | if (!ovn_port_find(ports, b->logical_port)) { | |
1779 | sbrec_mac_binding_delete(b); | |
1780 | } | |
1781 | } | |
1782 | } | |
1783 | ||
0bac7164 | 1784 | /* Updates the southbound Port_Binding table so that it contains the logical |
80f408f4 | 1785 | * switch ports specified by the northbound database. |
0bac7164 BP |
1786 | * |
1787 | * Initializes 'ports' to contain a "struct ovn_port" for every logical port, | |
1788 | * using the "struct ovn_datapath"s in 'datapaths' to look up logical | |
1789 | * datapaths. */ | |
5868eb24 BP |
1790 | static void |
1791 | build_ports(struct northd_context *ctx, struct hmap *datapaths, | |
1792 | struct hmap *ports) | |
1793 | { | |
1794 | struct ovs_list sb_only, nb_only, both; | |
a6095f81 BS |
1795 | struct hmap tag_alloc_table = HMAP_INITIALIZER(&tag_alloc_table); |
1796 | struct hmap chassis_qdisc_queues = HMAP_INITIALIZER(&chassis_qdisc_queues); | |
5868eb24 | 1797 | |
a6095f81 BS |
1798 | join_logical_ports(ctx, datapaths, ports, &chassis_qdisc_queues, |
1799 | &tag_alloc_table, &sb_only, &nb_only, &both); | |
5868eb24 | 1800 | |
5868eb24 | 1801 | struct ovn_port *op, *next; |
b511690b GS |
1802 | /* For logical ports that are in both databases, update the southbound |
1803 | * record based on northbound data. Also index the in-use tunnel_keys. | |
1804 | * For logical ports that are in NB database, do any tag allocation | |
1805 | * needed. */ | |
5868eb24 | 1806 | LIST_FOR_EACH_SAFE (op, next, list, &both) { |
b511690b GS |
1807 | if (op->nbsp) { |
1808 | tag_alloc_create_new_tag(&tag_alloc_table, op->nbsp); | |
1809 | } | |
a6095f81 | 1810 | ovn_port_update_sbrec(op, &chassis_qdisc_queues); |
5868eb24 BP |
1811 | |
1812 | add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key); | |
1813 | if (op->sb->tunnel_key > op->od->port_key_hint) { | |
1814 | op->od->port_key_hint = op->sb->tunnel_key; | |
1815 | } | |
1816 | } | |
1817 | ||
1818 | /* Add southbound record for each unmatched northbound record. */ | |
1819 | LIST_FOR_EACH_SAFE (op, next, list, &nb_only) { | |
1820 | uint16_t tunnel_key = ovn_port_allocate_key(op->od); | |
1821 | if (!tunnel_key) { | |
1822 | continue; | |
1823 | } | |
1824 | ||
1825 | op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn); | |
a6095f81 | 1826 | ovn_port_update_sbrec(op, &chassis_qdisc_queues); |
5868eb24 BP |
1827 | |
1828 | sbrec_port_binding_set_logical_port(op->sb, op->key); | |
1829 | sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key); | |
1830 | } | |
1831 | ||
6e31816f CSV |
1832 | bool remove_mac_bindings = false; |
1833 | if (!ovs_list_is_empty(&sb_only)) { | |
1834 | remove_mac_bindings = true; | |
1835 | } | |
1836 | ||
5868eb24 BP |
1837 | /* Delete southbound records without northbound matches. */ |
1838 | LIST_FOR_EACH_SAFE(op, next, list, &sb_only) { | |
417e7e66 | 1839 | ovs_list_remove(&op->list); |
5868eb24 BP |
1840 | sbrec_port_binding_delete(op->sb); |
1841 | ovn_port_destroy(ports, op); | |
1842 | } | |
6e31816f CSV |
1843 | if (remove_mac_bindings) { |
1844 | cleanup_mac_bindings(ctx, ports); | |
1845 | } | |
b511690b GS |
1846 | |
1847 | tag_alloc_destroy(&tag_alloc_table); | |
a6095f81 | 1848 | destroy_chassis_queues(&chassis_qdisc_queues); |
5868eb24 BP |
1849 | } |
1850 | \f | |
1851 | #define OVN_MIN_MULTICAST 32768 | |
1852 | #define OVN_MAX_MULTICAST 65535 | |
1853 | ||
1854 | struct multicast_group { | |
1855 | const char *name; | |
1856 | uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */ | |
1857 | }; | |
1858 | ||
1859 | #define MC_FLOOD "_MC_flood" | |
1860 | static const struct multicast_group mc_flood = { MC_FLOOD, 65535 }; | |
1861 | ||
1862 | #define MC_UNKNOWN "_MC_unknown" | |
1863 | static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 }; | |
1864 | ||
1865 | static bool | |
1866 | multicast_group_equal(const struct multicast_group *a, | |
1867 | const struct multicast_group *b) | |
1868 | { | |
1869 | return !strcmp(a->name, b->name) && a->key == b->key; | |
1870 | } | |
1871 | ||
1872 | /* Multicast group entry. */ | |
1873 | struct ovn_multicast { | |
1874 | struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */ | |
1875 | struct ovn_datapath *datapath; | |
1876 | const struct multicast_group *group; | |
1877 | ||
1878 | struct ovn_port **ports; | |
1879 | size_t n_ports, allocated_ports; | |
1880 | }; | |
1881 | ||
1882 | static uint32_t | |
1883 | ovn_multicast_hash(const struct ovn_datapath *datapath, | |
1884 | const struct multicast_group *group) | |
1885 | { | |
1886 | return hash_pointer(datapath, group->key); | |
1887 | } | |
1888 | ||
1889 | static struct ovn_multicast * | |
1890 | ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath, | |
1891 | const struct multicast_group *group) | |
1892 | { | |
1893 | struct ovn_multicast *mc; | |
1894 | ||
1895 | HMAP_FOR_EACH_WITH_HASH (mc, hmap_node, | |
1896 | ovn_multicast_hash(datapath, group), mcgroups) { | |
1897 | if (mc->datapath == datapath | |
1898 | && multicast_group_equal(mc->group, group)) { | |
1899 | return mc; | |
4edcdcf4 RB |
1900 | } |
1901 | } | |
5868eb24 BP |
1902 | return NULL; |
1903 | } | |
1904 | ||
1905 | static void | |
1906 | ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group, | |
1907 | struct ovn_port *port) | |
1908 | { | |
1909 | struct ovn_datapath *od = port->od; | |
1910 | struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group); | |
1911 | if (!mc) { | |
1912 | mc = xmalloc(sizeof *mc); | |
1913 | hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group)); | |
1914 | mc->datapath = od; | |
1915 | mc->group = group; | |
1916 | mc->n_ports = 0; | |
1917 | mc->allocated_ports = 4; | |
1918 | mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports); | |
1919 | } | |
1920 | if (mc->n_ports >= mc->allocated_ports) { | |
1921 | mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports, | |
1922 | sizeof *mc->ports); | |
1923 | } | |
1924 | mc->ports[mc->n_ports++] = port; | |
1925 | } | |
4edcdcf4 | 1926 | |
5868eb24 BP |
1927 | static void |
1928 | ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc) | |
1929 | { | |
1930 | if (mc) { | |
1931 | hmap_remove(mcgroups, &mc->hmap_node); | |
1932 | free(mc->ports); | |
1933 | free(mc); | |
1934 | } | |
1935 | } | |
4edcdcf4 | 1936 | |
5868eb24 BP |
1937 | static void |
1938 | ovn_multicast_update_sbrec(const struct ovn_multicast *mc, | |
1939 | const struct sbrec_multicast_group *sb) | |
1940 | { | |
1941 | struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports); | |
1942 | for (size_t i = 0; i < mc->n_ports; i++) { | |
1943 | ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb); | |
1944 | } | |
1945 | sbrec_multicast_group_set_ports(sb, ports, mc->n_ports); | |
1946 | free(ports); | |
4edcdcf4 | 1947 | } |
bd39395f | 1948 | \f |
48605550 | 1949 | /* Logical flow generation. |
bd39395f | 1950 | * |
48605550 | 1951 | * This code generates the Logical_Flow table in the southbound database, as a |
bd39395f BP |
1952 | * function of most of the northbound database. |
1953 | */ | |
1954 | ||
5868eb24 BP |
1955 | struct ovn_lflow { |
1956 | struct hmap_node hmap_node; | |
bd39395f | 1957 | |
5868eb24 | 1958 | struct ovn_datapath *od; |
880fcd14 | 1959 | enum ovn_stage stage; |
5868eb24 BP |
1960 | uint16_t priority; |
1961 | char *match; | |
1962 | char *actions; | |
d8026bbf | 1963 | const char *where; |
bd39395f BP |
1964 | }; |
1965 | ||
1966 | static size_t | |
5868eb24 | 1967 | ovn_lflow_hash(const struct ovn_lflow *lflow) |
bd39395f | 1968 | { |
5868eb24 | 1969 | size_t hash = uuid_hash(&lflow->od->key); |
880fcd14 | 1970 | hash = hash_2words((lflow->stage << 16) | lflow->priority, hash); |
5868eb24 BP |
1971 | hash = hash_string(lflow->match, hash); |
1972 | return hash_string(lflow->actions, hash); | |
bd39395f BP |
1973 | } |
1974 | ||
5868eb24 BP |
1975 | static bool |
1976 | ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b) | |
1977 | { | |
1978 | return (a->od == b->od | |
880fcd14 | 1979 | && a->stage == b->stage |
5868eb24 BP |
1980 | && a->priority == b->priority |
1981 | && !strcmp(a->match, b->match) | |
1982 | && !strcmp(a->actions, b->actions)); | |
1983 | } | |
1984 | ||
1985 | static void | |
1986 | ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od, | |
d8026bbf BP |
1987 | enum ovn_stage stage, uint16_t priority, |
1988 | char *match, char *actions, const char *where) | |
bd39395f | 1989 | { |
5868eb24 | 1990 | lflow->od = od; |
880fcd14 | 1991 | lflow->stage = stage; |
5868eb24 BP |
1992 | lflow->priority = priority; |
1993 | lflow->match = match; | |
1994 | lflow->actions = actions; | |
d8026bbf | 1995 | lflow->where = where; |
bd39395f BP |
1996 | } |
1997 | ||
48605550 | 1998 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
bd39395f | 1999 | static void |
d8026bbf BP |
2000 | ovn_lflow_add_at(struct hmap *lflow_map, struct ovn_datapath *od, |
2001 | enum ovn_stage stage, uint16_t priority, | |
2002 | const char *match, const char *actions, const char *where) | |
5868eb24 | 2003 | { |
9a9961d2 BP |
2004 | ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od)); |
2005 | ||
5868eb24 | 2006 | struct ovn_lflow *lflow = xmalloc(sizeof *lflow); |
880fcd14 | 2007 | ovn_lflow_init(lflow, od, stage, priority, |
d8026bbf | 2008 | xstrdup(match), xstrdup(actions), where); |
5868eb24 BP |
2009 | hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow)); |
2010 | } | |
2011 | ||
d8026bbf BP |
2012 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
2013 | #define ovn_lflow_add(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS) \ | |
2014 | ovn_lflow_add_at(LFLOW_MAP, OD, STAGE, PRIORITY, MATCH, ACTIONS, \ | |
2015 | OVS_SOURCE_LOCATOR) | |
2016 | ||
5868eb24 BP |
2017 | static struct ovn_lflow * |
2018 | ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od, | |
880fcd14 | 2019 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
2020 | const char *match, const char *actions) |
2021 | { | |
2022 | struct ovn_lflow target; | |
880fcd14 | 2023 | ovn_lflow_init(&target, od, stage, priority, |
d8026bbf BP |
2024 | CONST_CAST(char *, match), CONST_CAST(char *, actions), |
2025 | NULL); | |
5868eb24 BP |
2026 | |
2027 | struct ovn_lflow *lflow; | |
2028 | HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target), | |
2029 | lflows) { | |
2030 | if (ovn_lflow_equal(lflow, &target)) { | |
2031 | return lflow; | |
bd39395f BP |
2032 | } |
2033 | } | |
5868eb24 BP |
2034 | return NULL; |
2035 | } | |
bd39395f | 2036 | |
5868eb24 BP |
2037 | static void |
2038 | ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow) | |
2039 | { | |
2040 | if (lflow) { | |
2041 | hmap_remove(lflows, &lflow->hmap_node); | |
2042 | free(lflow->match); | |
2043 | free(lflow->actions); | |
2044 | free(lflow); | |
2045 | } | |
bd39395f BP |
2046 | } |
2047 | ||
bd39395f | 2048 | /* Appends port security constraints on L2 address field 'eth_addr_field' |
e93b43d6 JP |
2049 | * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs' |
2050 | * elements, is the collection of port_security constraints from an | |
2051 | * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */ | |
bd39395f | 2052 | static void |
685f4dfe | 2053 | build_port_security_l2(const char *eth_addr_field, |
e93b43d6 JP |
2054 | struct lport_addresses *ps_addrs, |
2055 | unsigned int n_ps_addrs, | |
685f4dfe | 2056 | struct ds *match) |
bd39395f | 2057 | { |
e93b43d6 JP |
2058 | if (!n_ps_addrs) { |
2059 | return; | |
2060 | } | |
bd39395f | 2061 | |
e93b43d6 | 2062 | ds_put_format(match, " && %s == {", eth_addr_field); |
f7cb14cd | 2063 | |
e93b43d6 JP |
2064 | for (size_t i = 0; i < n_ps_addrs; i++) { |
2065 | ds_put_format(match, "%s ", ps_addrs[i].ea_s); | |
bd39395f | 2066 | } |
f7cb14cd | 2067 | ds_chomp(match, ' '); |
bd39395f | 2068 | ds_put_cstr(match, "}"); |
bd39395f BP |
2069 | } |
2070 | ||
685f4dfe NS |
2071 | static void |
2072 | build_port_security_ipv6_nd_flow( | |
2073 | struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs, | |
2074 | int n_ipv6_addrs) | |
2075 | { | |
2076 | ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || " | |
2077 | "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || " | |
2078 | "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero), | |
2079 | ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero), | |
2080 | ETH_ADDR_ARGS(ea)); | |
2081 | if (!n_ipv6_addrs) { | |
2082 | ds_put_cstr(match, "))"); | |
2083 | return; | |
2084 | } | |
2085 | ||
2086 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
2087 | struct in6_addr lla; | |
2088 | in6_generate_lla(ea, &lla); | |
2089 | memset(ip6_str, 0, sizeof(ip6_str)); | |
2090 | ipv6_string_mapped(ip6_str, &lla); | |
2091 | ds_put_format(match, " && (nd.target == %s", ip6_str); | |
2092 | ||
2093 | for(int i = 0; i < n_ipv6_addrs; i++) { | |
2094 | memset(ip6_str, 0, sizeof(ip6_str)); | |
2095 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
2096 | ds_put_format(match, " || nd.target == %s", ip6_str); | |
2097 | } | |
2098 | ||
2099 | ds_put_format(match, ")))"); | |
2100 | } | |
2101 | ||
2102 | static void | |
2103 | build_port_security_ipv6_flow( | |
2104 | enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea, | |
2105 | struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs) | |
2106 | { | |
2107 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
2108 | ||
2109 | ds_put_format(match, " && %s == {", | |
2110 | pipeline == P_IN ? "ip6.src" : "ip6.dst"); | |
2111 | ||
2112 | /* Allow link-local address. */ | |
2113 | struct in6_addr lla; | |
2114 | in6_generate_lla(ea, &lla); | |
2115 | ipv6_string_mapped(ip6_str, &lla); | |
2116 | ds_put_format(match, "%s, ", ip6_str); | |
2117 | ||
9e687b23 DL |
2118 | /* Allow ip6.dst=ff00::/8 for multicast packets */ |
2119 | if (pipeline == P_OUT) { | |
2120 | ds_put_cstr(match, "ff00::/8, "); | |
2121 | } | |
685f4dfe NS |
2122 | for(int i = 0; i < n_ipv6_addrs; i++) { |
2123 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
9e687b23 | 2124 | ds_put_format(match, "%s, ", ip6_str); |
685f4dfe | 2125 | } |
9e687b23 DL |
2126 | /* Replace ", " by "}". */ |
2127 | ds_chomp(match, ' '); | |
2128 | ds_chomp(match, ','); | |
685f4dfe NS |
2129 | ds_put_cstr(match, "}"); |
2130 | } | |
2131 | ||
2132 | /** | |
2133 | * Build port security constraints on ARP and IPv6 ND fields | |
2134 | * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage. | |
2135 | * | |
2136 | * For each port security of the logical port, following | |
2137 | * logical flows are added | |
2138 | * - If the port security has no IP (both IPv4 and IPv6) or | |
2139 | * if it has IPv4 address(es) | |
2140 | * - Priority 90 flow to allow ARP packets for known MAC addresses | |
2141 | * in the eth.src and arp.spa fields. If the port security | |
2142 | * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field. | |
2143 | * | |
2144 | * - If the port security has no IP (both IPv4 and IPv6) or | |
2145 | * if it has IPv6 address(es) | |
2146 | * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses | |
2147 | * in the eth.src and nd.sll/nd.tll fields. If the port security | |
2148 | * has IPv6 addresses, allow known IPv6 addresses in the nd.target field | |
2149 | * for IPv6 Neighbor Advertisement packet. | |
2150 | * | |
2151 | * - Priority 80 flow to drop ARP and IPv6 ND packets. | |
2152 | */ | |
2153 | static void | |
2154 | build_port_security_nd(struct ovn_port *op, struct hmap *lflows) | |
2155 | { | |
e93b43d6 JP |
2156 | struct ds match = DS_EMPTY_INITIALIZER; |
2157 | ||
2158 | for (size_t i = 0; i < op->n_ps_addrs; i++) { | |
2159 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 2160 | |
e93b43d6 | 2161 | bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs); |
685f4dfe | 2162 | |
e93b43d6 JP |
2163 | ds_clear(&match); |
2164 | if (ps->n_ipv4_addrs || no_ip) { | |
2165 | ds_put_format(&match, | |
2166 | "inport == %s && eth.src == %s && arp.sha == %s", | |
2167 | op->json_key, ps->ea_s, ps->ea_s); | |
685f4dfe | 2168 | |
e93b43d6 JP |
2169 | if (ps->n_ipv4_addrs) { |
2170 | ds_put_cstr(&match, " && arp.spa == {"); | |
f95523c0 | 2171 | for (size_t j = 0; j < ps->n_ipv4_addrs; j++) { |
7d9d86ad NS |
2172 | /* When the netmask is applied, if the host portion is |
2173 | * non-zero, the host can only use the specified | |
2174 | * address in the arp.spa. If zero, the host is allowed | |
2175 | * to use any address in the subnet. */ | |
f95523c0 JP |
2176 | if (ps->ipv4_addrs[j].plen == 32 |
2177 | || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) { | |
2178 | ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s); | |
7d9d86ad | 2179 | } else { |
e93b43d6 | 2180 | ds_put_format(&match, "%s/%d", |
f95523c0 JP |
2181 | ps->ipv4_addrs[j].network_s, |
2182 | ps->ipv4_addrs[j].plen); | |
7d9d86ad | 2183 | } |
e93b43d6 | 2184 | ds_put_cstr(&match, ", "); |
685f4dfe NS |
2185 | } |
2186 | ds_chomp(&match, ' '); | |
e93b43d6 JP |
2187 | ds_chomp(&match, ','); |
2188 | ds_put_cstr(&match, "}"); | |
685f4dfe NS |
2189 | } |
2190 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, | |
2191 | ds_cstr(&match), "next;"); | |
685f4dfe NS |
2192 | } |
2193 | ||
e93b43d6 JP |
2194 | if (ps->n_ipv6_addrs || no_ip) { |
2195 | ds_clear(&match); | |
2196 | ds_put_format(&match, "inport == %s && eth.src == %s", | |
2197 | op->json_key, ps->ea_s); | |
2198 | build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs, | |
2199 | ps->n_ipv6_addrs); | |
685f4dfe NS |
2200 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, |
2201 | ds_cstr(&match), "next;"); | |
685f4dfe | 2202 | } |
685f4dfe NS |
2203 | } |
2204 | ||
e93b43d6 JP |
2205 | ds_clear(&match); |
2206 | ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key); | |
685f4dfe | 2207 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80, |
e93b43d6 JP |
2208 | ds_cstr(&match), "drop;"); |
2209 | ds_destroy(&match); | |
685f4dfe NS |
2210 | } |
2211 | ||
2212 | /** | |
2213 | * Build port security constraints on IPv4 and IPv6 src and dst fields | |
2214 | * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage. | |
2215 | * | |
2216 | * For each port security of the logical port, following | |
2217 | * logical flows are added | |
2218 | * - If the port security has IPv4 addresses, | |
2219 | * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses | |
2220 | * | |
2221 | * - If the port security has IPv6 addresses, | |
2222 | * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses | |
2223 | * | |
2224 | * - If the port security has IPv4 addresses or IPv6 addresses or both | |
2225 | * - Priority 80 flow to drop all IPv4 and IPv6 traffic | |
2226 | */ | |
2227 | static void | |
2228 | build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, | |
2229 | struct hmap *lflows) | |
2230 | { | |
2231 | char *port_direction; | |
2232 | enum ovn_stage stage; | |
2233 | if (pipeline == P_IN) { | |
2234 | port_direction = "inport"; | |
2235 | stage = S_SWITCH_IN_PORT_SEC_IP; | |
2236 | } else { | |
2237 | port_direction = "outport"; | |
2238 | stage = S_SWITCH_OUT_PORT_SEC_IP; | |
2239 | } | |
2240 | ||
e93b43d6 JP |
2241 | for (size_t i = 0; i < op->n_ps_addrs; i++) { |
2242 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 2243 | |
e93b43d6 | 2244 | if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) { |
685f4dfe NS |
2245 | continue; |
2246 | } | |
2247 | ||
e93b43d6 | 2248 | if (ps->n_ipv4_addrs) { |
685f4dfe NS |
2249 | struct ds match = DS_EMPTY_INITIALIZER; |
2250 | if (pipeline == P_IN) { | |
9e687b23 DL |
2251 | /* Permit use of the unspecified address for DHCP discovery */ |
2252 | struct ds dhcp_match = DS_EMPTY_INITIALIZER; | |
2253 | ds_put_format(&dhcp_match, "inport == %s" | |
e93b43d6 | 2254 | " && eth.src == %s" |
9e687b23 DL |
2255 | " && ip4.src == 0.0.0.0" |
2256 | " && ip4.dst == 255.255.255.255" | |
e93b43d6 JP |
2257 | " && udp.src == 68 && udp.dst == 67", |
2258 | op->json_key, ps->ea_s); | |
9e687b23 DL |
2259 | ovn_lflow_add(lflows, op->od, stage, 90, |
2260 | ds_cstr(&dhcp_match), "next;"); | |
2261 | ds_destroy(&dhcp_match); | |
e93b43d6 | 2262 | ds_put_format(&match, "inport == %s && eth.src == %s" |
9e687b23 | 2263 | " && ip4.src == {", op->json_key, |
e93b43d6 | 2264 | ps->ea_s); |
685f4dfe | 2265 | } else { |
e93b43d6 | 2266 | ds_put_format(&match, "outport == %s && eth.dst == %s" |
685f4dfe | 2267 | " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ", |
e93b43d6 | 2268 | op->json_key, ps->ea_s); |
685f4dfe NS |
2269 | } |
2270 | ||
f95523c0 JP |
2271 | for (int j = 0; j < ps->n_ipv4_addrs; j++) { |
2272 | ovs_be32 mask = ps->ipv4_addrs[j].mask; | |
7d9d86ad NS |
2273 | /* When the netmask is applied, if the host portion is |
2274 | * non-zero, the host can only use the specified | |
2275 | * address. If zero, the host is allowed to use any | |
2276 | * address in the subnet. | |
e93b43d6 | 2277 | */ |
f95523c0 JP |
2278 | if (ps->ipv4_addrs[j].plen == 32 |
2279 | || ps->ipv4_addrs[j].addr & ~mask) { | |
2280 | ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s); | |
2281 | if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) { | |
e93b43d6 JP |
2282 | /* Host is also allowed to receive packets to the |
2283 | * broadcast address in the specified subnet. */ | |
2284 | ds_put_format(&match, ", %s", | |
f95523c0 | 2285 | ps->ipv4_addrs[j].bcast_s); |
7d9d86ad NS |
2286 | } |
2287 | } else { | |
2288 | /* host portion is zero */ | |
f95523c0 JP |
2289 | ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s, |
2290 | ps->ipv4_addrs[j].plen); | |
7d9d86ad NS |
2291 | } |
2292 | ds_put_cstr(&match, ", "); | |
685f4dfe NS |
2293 | } |
2294 | ||
2295 | /* Replace ", " by "}". */ | |
2296 | ds_chomp(&match, ' '); | |
2297 | ds_chomp(&match, ','); | |
2298 | ds_put_cstr(&match, "}"); | |
2299 | ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;"); | |
2300 | ds_destroy(&match); | |
685f4dfe NS |
2301 | } |
2302 | ||
e93b43d6 | 2303 | if (ps->n_ipv6_addrs) { |
685f4dfe | 2304 | struct ds match = DS_EMPTY_INITIALIZER; |
9e687b23 DL |
2305 | if (pipeline == P_IN) { |
2306 | /* Permit use of unspecified address for duplicate address | |
2307 | * detection */ | |
2308 | struct ds dad_match = DS_EMPTY_INITIALIZER; | |
2309 | ds_put_format(&dad_match, "inport == %s" | |
e93b43d6 | 2310 | " && eth.src == %s" |
9e687b23 DL |
2311 | " && ip6.src == ::" |
2312 | " && ip6.dst == ff02::/16" | |
2313 | " && icmp6.type == {131, 135, 143}", op->json_key, | |
e93b43d6 | 2314 | ps->ea_s); |
9e687b23 DL |
2315 | ovn_lflow_add(lflows, op->od, stage, 90, |
2316 | ds_cstr(&dad_match), "next;"); | |
2317 | ds_destroy(&dad_match); | |
2318 | } | |
e93b43d6 | 2319 | ds_put_format(&match, "%s == %s && %s == %s", |
685f4dfe | 2320 | port_direction, op->json_key, |
e93b43d6 JP |
2321 | pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s); |
2322 | build_port_security_ipv6_flow(pipeline, &match, ps->ea, | |
2323 | ps->ipv6_addrs, ps->n_ipv6_addrs); | |
685f4dfe NS |
2324 | ovn_lflow_add(lflows, op->od, stage, 90, |
2325 | ds_cstr(&match), "next;"); | |
2326 | ds_destroy(&match); | |
685f4dfe NS |
2327 | } |
2328 | ||
e93b43d6 JP |
2329 | char *match = xasprintf("%s == %s && %s == %s && ip", |
2330 | port_direction, op->json_key, | |
2331 | pipeline == P_IN ? "eth.src" : "eth.dst", | |
2332 | ps->ea_s); | |
685f4dfe NS |
2333 | ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;"); |
2334 | free(match); | |
2335 | } | |
f2a715b5 | 2336 | |
685f4dfe NS |
2337 | } |
2338 | ||
95a9a275 | 2339 | static bool |
80f408f4 | 2340 | lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) |
95a9a275 | 2341 | { |
80f408f4 | 2342 | return !lsp->enabled || *lsp->enabled; |
95a9a275 RB |
2343 | } |
2344 | ||
4c7bf534 | 2345 | static bool |
80f408f4 | 2346 | lsp_is_up(const struct nbrec_logical_switch_port *lsp) |
4c7bf534 | 2347 | { |
80f408f4 | 2348 | return !lsp->up || *lsp->up; |
4c7bf534 NS |
2349 | } |
2350 | ||
281977f7 NS |
2351 | static bool |
2352 | build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip, | |
213615b3 NS |
2353 | struct ds *options_action, struct ds *response_action, |
2354 | struct ds *ipv4_addr_match) | |
281977f7 NS |
2355 | { |
2356 | if (!op->nbsp->dhcpv4_options) { | |
2357 | /* CMS has disabled native DHCPv4 for this lport. */ | |
2358 | return false; | |
2359 | } | |
2360 | ||
2361 | ovs_be32 host_ip, mask; | |
2362 | char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip, | |
2363 | &mask); | |
2364 | if (error || ((offer_ip ^ host_ip) & mask)) { | |
2365 | /* Either | |
2366 | * - cidr defined is invalid or | |
2367 | * - the offer ip of the logical port doesn't belong to the cidr | |
2368 | * defined in the DHCPv4 options. | |
2369 | * */ | |
2370 | free(error); | |
2371 | return false; | |
2372 | } | |
2373 | ||
2374 | const char *server_ip = smap_get( | |
2375 | &op->nbsp->dhcpv4_options->options, "server_id"); | |
2376 | const char *server_mac = smap_get( | |
2377 | &op->nbsp->dhcpv4_options->options, "server_mac"); | |
2378 | const char *lease_time = smap_get( | |
2379 | &op->nbsp->dhcpv4_options->options, "lease_time"); | |
281977f7 | 2380 | |
b89d25e5 GL |
2381 | if (!(server_ip && server_mac && lease_time)) { |
2382 | /* "server_id", "server_mac" and "lease_time" should be | |
281977f7 NS |
2383 | * present in the dhcp_options. */ |
2384 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
2385 | VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s", | |
2386 | op->json_key); | |
2387 | return false; | |
2388 | } | |
2389 | ||
2390 | struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options); | |
2391 | smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options); | |
2392 | ||
2393 | /* server_mac is not DHCPv4 option, delete it from the smap. */ | |
2394 | smap_remove(&dhcpv4_options, "server_mac"); | |
2395 | char *netmask = xasprintf(IP_FMT, IP_ARGS(mask)); | |
2396 | smap_add(&dhcpv4_options, "netmask", netmask); | |
2397 | free(netmask); | |
2398 | ||
2399 | ds_put_format(options_action, | |
2400 | REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = " | |
2401 | IP_FMT", ", IP_ARGS(offer_ip)); | |
7c76bf4e DDP |
2402 | |
2403 | /* We're not using SMAP_FOR_EACH because we want a consistent order of the | |
2404 | * options on different architectures (big or little endian, SSE4.2) */ | |
2405 | const struct smap_node **sorted_opts = smap_sort(&dhcpv4_options); | |
2406 | for (size_t i = 0; i < smap_count(&dhcpv4_options); i++) { | |
2407 | const struct smap_node *node = sorted_opts[i]; | |
281977f7 NS |
2408 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); |
2409 | } | |
7c76bf4e | 2410 | free(sorted_opts); |
281977f7 NS |
2411 | |
2412 | ds_chomp(options_action, ' '); | |
2413 | ds_chomp(options_action, ','); | |
2414 | ds_put_cstr(options_action, "); next;"); | |
2415 | ||
2416 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
2417 | "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; " | |
bf143492 JP |
2418 | "udp.dst = 68; outport = inport; flags.loopback = 1; " |
2419 | "output;", | |
281977f7 NS |
2420 | server_mac, IP_ARGS(offer_ip), server_ip); |
2421 | ||
213615b3 NS |
2422 | ds_put_format(ipv4_addr_match, |
2423 | "ip4.src == "IP_FMT" && ip4.dst == {%s, 255.255.255.255}", | |
2424 | IP_ARGS(offer_ip), server_ip); | |
281977f7 NS |
2425 | smap_destroy(&dhcpv4_options); |
2426 | return true; | |
2427 | } | |
2428 | ||
33ac3c83 NS |
2429 | static bool |
2430 | build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip, | |
2431 | struct ds *options_action, struct ds *response_action) | |
2432 | { | |
2433 | if (!op->nbsp->dhcpv6_options) { | |
2434 | /* CMS has disabled native DHCPv6 for this lport. */ | |
2435 | return false; | |
2436 | } | |
2437 | ||
2438 | struct in6_addr host_ip, mask; | |
2439 | ||
2440 | char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip, | |
2441 | &mask); | |
2442 | if (error) { | |
2443 | free(error); | |
2444 | return false; | |
2445 | } | |
2446 | struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip); | |
2447 | ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask); | |
2448 | if (!ipv6_mask_is_any(&ip6_mask)) { | |
2449 | /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6 | |
2450 | * options.*/ | |
2451 | return false; | |
2452 | } | |
2453 | ||
7c76bf4e | 2454 | const struct smap *options_map = &op->nbsp->dhcpv6_options->options; |
33ac3c83 | 2455 | /* "server_id" should be the MAC address. */ |
7c76bf4e | 2456 | const char *server_mac = smap_get(options_map, "server_id"); |
33ac3c83 NS |
2457 | struct eth_addr ea; |
2458 | if (!server_mac || !eth_addr_from_string(server_mac, &ea)) { | |
2459 | /* "server_id" should be present in the dhcpv6_options. */ | |
2460 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2461 | VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options" | |
2462 | " for lport %s", op->json_key); | |
2463 | return false; | |
2464 | } | |
2465 | ||
2466 | /* Get the link local IP of the DHCPv6 server from the server MAC. */ | |
2467 | struct in6_addr lla; | |
2468 | in6_generate_lla(ea, &lla); | |
2469 | ||
2470 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
2471 | ipv6_string_mapped(server_ip, &lla); | |
2472 | ||
2473 | char ia_addr[INET6_ADDRSTRLEN + 1]; | |
2474 | ipv6_string_mapped(ia_addr, offer_ip); | |
2475 | ||
2476 | ds_put_format(options_action, | |
40df4566 ZKL |
2477 | REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts("); |
2478 | ||
2479 | /* Check whether the dhcpv6 options should be configured as stateful. | |
2480 | * Only reply with ia_addr option for dhcpv6 stateful address mode. */ | |
7c76bf4e | 2481 | if (!smap_get_bool(options_map, "dhcpv6_stateless", false)) { |
40df4566 ZKL |
2482 | char ia_addr[INET6_ADDRSTRLEN + 1]; |
2483 | ipv6_string_mapped(ia_addr, offer_ip); | |
2484 | ||
2485 | ds_put_format(options_action, "ia_addr = %s, ", ia_addr); | |
2486 | } | |
2487 | ||
7c76bf4e DDP |
2488 | /* We're not using SMAP_FOR_EACH because we want a consistent order of the |
2489 | * options on different architectures (big or little endian, SSE4.2) */ | |
2490 | const struct smap_node **sorted_opts = smap_sort(options_map); | |
2491 | for (size_t i = 0; i < smap_count(options_map); i++) { | |
2492 | const struct smap_node *node = sorted_opts[i]; | |
40df4566 ZKL |
2493 | if (strcmp(node->key, "dhcpv6_stateless")) { |
2494 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); | |
2495 | } | |
33ac3c83 | 2496 | } |
7c76bf4e DDP |
2497 | free(sorted_opts); |
2498 | ||
33ac3c83 NS |
2499 | ds_chomp(options_action, ' '); |
2500 | ds_chomp(options_action, ','); | |
2501 | ds_put_cstr(options_action, "); next;"); | |
2502 | ||
2503 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
2504 | "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; " | |
2505 | "udp.dst = 546; outport = inport; flags.loopback = 1; " | |
2506 | "output;", | |
2507 | server_mac, server_ip); | |
40df4566 | 2508 | |
33ac3c83 NS |
2509 | return true; |
2510 | } | |
2511 | ||
78aab811 JP |
2512 | static bool |
2513 | has_stateful_acl(struct ovn_datapath *od) | |
2514 | { | |
9975d7be BP |
2515 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
2516 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 JP |
2517 | if (!strcmp(acl->action, "allow-related")) { |
2518 | return true; | |
2519 | } | |
2520 | } | |
2521 | ||
2522 | return false; | |
2523 | } | |
2524 | ||
2525 | static void | |
9ab989b7 | 2526 | build_pre_acls(struct ovn_datapath *od, struct hmap *lflows) |
78aab811 JP |
2527 | { |
2528 | bool has_stateful = has_stateful_acl(od); | |
2529 | ||
2530 | /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are | |
2531 | * allowed by default. */ | |
880fcd14 BP |
2532 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); |
2533 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;"); | |
78aab811 | 2534 | |
c132fca0 | 2535 | /* If there are any stateful ACL rules in this datapath, we must |
78aab811 JP |
2536 | * send all IP packets through the conntrack action, which handles |
2537 | * defragmentation, in order to match L4 headers. */ | |
2538 | if (has_stateful) { | |
9ab989b7 BP |
2539 | for (size_t i = 0; i < od->n_router_ports; i++) { |
2540 | struct ovn_port *op = od->router_ports[i]; | |
2541 | /* Can't use ct() for router ports. Consider the | |
2542 | * following configuration: lp1(10.0.0.2) on | |
2543 | * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a | |
2544 | * ping from lp1 to lp2, First, the response will go | |
2545 | * through ct() with a zone for lp2 in the ls2 ingress | |
2546 | * pipeline on hostB. That ct zone knows about this | |
2547 | * connection. Next, it goes through ct() with the zone | |
2548 | * for the router port in the egress pipeline of ls2 on | |
2549 | * hostB. This zone does not know about the connection, | |
2550 | * as the icmp request went through the logical router | |
2551 | * on hostA, not hostB. This would only work with | |
2552 | * distributed conntrack state across all chassis. */ | |
2553 | struct ds match_in = DS_EMPTY_INITIALIZER; | |
2554 | struct ds match_out = DS_EMPTY_INITIALIZER; | |
2555 | ||
2556 | ds_put_format(&match_in, "ip && inport == %s", op->json_key); | |
2557 | ds_put_format(&match_out, "ip && outport == %s", op->json_key); | |
2558 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, | |
2559 | ds_cstr(&match_in), "next;"); | |
2560 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, | |
2561 | ds_cstr(&match_out), "next;"); | |
2562 | ||
2563 | ds_destroy(&match_in); | |
2564 | ds_destroy(&match_out); | |
48fcdb47 | 2565 | } |
2d018f9b GS |
2566 | /* Ingress and Egress Pre-ACL Table (Priority 110). |
2567 | * | |
2568 | * Not to do conntrack on ND packets. */ | |
2569 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;"); | |
2570 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;"); | |
48fcdb47 | 2571 | |
78aab811 JP |
2572 | /* Ingress and Egress Pre-ACL Table (Priority 100). |
2573 | * | |
2574 | * Regardless of whether the ACL is "from-lport" or "to-lport", | |
2575 | * we need rules in both the ingress and egress table, because | |
facf8652 GS |
2576 | * the return traffic needs to be followed. |
2577 | * | |
2578 | * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send | |
2579 | * it to conntrack for tracking and defragmentation. */ | |
2580 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", | |
2581 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2582 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", | |
2583 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2d018f9b GS |
2584 | } |
2585 | } | |
78aab811 | 2586 | |
7a15be69 GS |
2587 | /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and |
2588 | * 'ip_address'. The caller must free() the memory allocated for | |
2589 | * 'ip_address'. */ | |
2590 | static void | |
2591 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
2592 | uint16_t *port) | |
2593 | { | |
2594 | char *ip_str, *start, *next; | |
2595 | *ip_address = NULL; | |
2596 | *port = 0; | |
2597 | ||
2598 | next = start = xstrdup(key); | |
2599 | ip_str = strsep(&next, ":"); | |
2600 | if (!ip_str || !ip_str[0]) { | |
2601 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2602 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
2603 | free(start); | |
2604 | return; | |
2605 | } | |
2606 | ||
2607 | ovs_be32 ip, mask; | |
2608 | char *error = ip_parse_masked(ip_str, &ip, &mask); | |
2609 | if (error || mask != OVS_BE32_MAX) { | |
2610 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2611 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
2612 | free(start); | |
2613 | free(error); | |
2614 | return; | |
2615 | } | |
2616 | ||
2617 | int l4_port = 0; | |
2618 | if (next && next[0]) { | |
2619 | if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) { | |
2620 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2621 | VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key); | |
2622 | free(start); | |
2623 | return; | |
2624 | } | |
2625 | } | |
2626 | ||
2627 | *port = l4_port; | |
2628 | *ip_address = strdup(ip_str); | |
2629 | free(start); | |
2630 | } | |
2631 | ||
2632 | static void | |
2633 | build_pre_lb(struct ovn_datapath *od, struct hmap *lflows) | |
2634 | { | |
2635 | /* Allow all packets to go to next tables by default. */ | |
2636 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;"); | |
2637 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;"); | |
2638 | ||
2639 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
61591ad9 GS |
2640 | bool vip_configured = false; |
2641 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { | |
2642 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
2643 | struct smap *vips = &lb->vips; |
2644 | struct smap_node *node; | |
7a15be69 GS |
2645 | |
2646 | SMAP_FOR_EACH (node, vips) { | |
2647 | vip_configured = true; | |
2648 | ||
2649 | /* node->key contains IP:port or just IP. */ | |
2650 | char *ip_address = NULL; | |
2651 | uint16_t port; | |
2652 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
2653 | if (!ip_address) { | |
2654 | continue; | |
2655 | } | |
2656 | ||
2657 | if (!sset_contains(&all_ips, ip_address)) { | |
2658 | sset_add(&all_ips, ip_address); | |
2659 | } | |
2660 | ||
2661 | free(ip_address); | |
2662 | ||
2663 | /* Ignore L4 port information in the key because fragmented packets | |
2664 | * may not have L4 information. The pre-stateful table will send | |
2665 | * the packet through ct() action to de-fragment. In stateful | |
2666 | * table, we will eventually look at L4 information. */ | |
2667 | } | |
61591ad9 | 2668 | } |
7a15be69 | 2669 | |
61591ad9 GS |
2670 | /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send |
2671 | * packet to conntrack for defragmentation. */ | |
2672 | const char *ip_address; | |
2673 | SSET_FOR_EACH(ip_address, &all_ips) { | |
2674 | char *match = xasprintf("ip && ip4.dst == %s", ip_address); | |
2675 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, | |
2676 | 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2677 | free(match); | |
2678 | } | |
7a15be69 | 2679 | |
61591ad9 | 2680 | sset_destroy(&all_ips); |
7a15be69 | 2681 | |
61591ad9 GS |
2682 | if (vip_configured) { |
2683 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, | |
2684 | 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
7a15be69 GS |
2685 | } |
2686 | } | |
2687 | ||
facf8652 GS |
2688 | static void |
2689 | build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
2690 | { | |
2691 | /* Ingress and Egress pre-stateful Table (Priority 0): Packets are | |
2692 | * allowed by default. */ | |
2693 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;"); | |
2694 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;"); | |
2695 | ||
2696 | /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be | |
2697 | * sent to conntrack for tracking and defragmentation. */ | |
2698 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100, | |
2699 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
2700 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100, | |
2701 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
2702 | } | |
2703 | ||
2d018f9b GS |
2704 | static void |
2705 | build_acls(struct ovn_datapath *od, struct hmap *lflows) | |
2706 | { | |
2707 | bool has_stateful = has_stateful_acl(od); | |
e75451fe | 2708 | |
2d018f9b GS |
2709 | /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by |
2710 | * default. A related rule at priority 1 is added below if there | |
2711 | * are any stateful ACLs in this datapath. */ | |
2712 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); | |
2713 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); | |
2714 | ||
2715 | if (has_stateful) { | |
78aab811 JP |
2716 | /* Ingress and Egress ACL Table (Priority 1). |
2717 | * | |
2718 | * By default, traffic is allowed. This is partially handled by | |
2719 | * the Priority 0 ACL flows added earlier, but we also need to | |
2720 | * commit IP flows. This is because, while the initiater's | |
2721 | * direction may not have any stateful rules, the server's may | |
2722 | * and then its return traffic would not have an associated | |
cc58e1f2 RB |
2723 | * conntrack entry and would return "+invalid". |
2724 | * | |
2725 | * We use "ct_commit" for a connection that is not already known | |
2726 | * by the connection tracker. Once a connection is committed, | |
2727 | * subsequent packets will hit the flow at priority 0 that just | |
2728 | * uses "next;" | |
2729 | * | |
b73db61d | 2730 | * We also check for established connections that have ct_label.blocked |
cc58e1f2 RB |
2731 | * set on them. That's a connection that was disallowed, but is |
2732 | * now allowed by policy again since it hit this default-allow flow. | |
b73db61d | 2733 | * We need to set ct_label.blocked=0 to let the connection continue, |
cc58e1f2 RB |
2734 | * which will be done by ct_commit() in the "stateful" stage. |
2735 | * Subsequent packets will hit the flow at priority 0 that just | |
2736 | * uses "next;". */ | |
2737 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, | |
b73db61d | 2738 | "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", |
cc58e1f2 RB |
2739 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); |
2740 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, | |
b73db61d | 2741 | "ip && (!ct.est || (ct.est && ct_label.blocked == 1))", |
cc58e1f2 | 2742 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); |
78aab811 JP |
2743 | |
2744 | /* Ingress and Egress ACL Table (Priority 65535). | |
2745 | * | |
cc58e1f2 RB |
2746 | * Always drop traffic that's in an invalid state. Also drop |
2747 | * reply direction packets for connections that have been marked | |
2748 | * for deletion (bit 0 of ct_label is set). | |
2749 | * | |
2750 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 2751 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
b73db61d | 2752 | "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", |
cc58e1f2 | 2753 | "drop;"); |
880fcd14 | 2754 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
b73db61d | 2755 | "ct.inv || (ct.est && ct.rpl && ct_label.blocked == 1)", |
cc58e1f2 | 2756 | "drop;"); |
78aab811 JP |
2757 | |
2758 | /* Ingress and Egress ACL Table (Priority 65535). | |
2759 | * | |
cc58e1f2 RB |
2760 | * Allow reply traffic that is part of an established |
2761 | * conntrack entry that has not been marked for deletion | |
2762 | * (bit 0 of ct_label). We only match traffic in the | |
2763 | * reply direction because we want traffic in the request | |
2764 | * direction to hit the currently defined policy from ACLs. | |
2765 | * | |
2766 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 2767 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 | 2768 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
b73db61d | 2769 | "&& ct.rpl && ct_label.blocked == 0", |
78aab811 | 2770 | "next;"); |
880fcd14 | 2771 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 | 2772 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
b73db61d | 2773 | "&& ct.rpl && ct_label.blocked == 0", |
78aab811 JP |
2774 | "next;"); |
2775 | ||
2776 | /* Ingress and Egress ACL Table (Priority 65535). | |
2777 | * | |
cc58e1f2 RB |
2778 | * Allow traffic that is related to an existing conntrack entry that |
2779 | * has not been marked for deletion (bit 0 of ct_label). | |
2780 | * | |
2781 | * This is enforced at a higher priority than ACLs can be defined. | |
78aab811 JP |
2782 | * |
2783 | * NOTE: This does not support related data sessions (eg, | |
2784 | * a dynamically negotiated FTP data channel), but will allow | |
2785 | * related traffic such as an ICMP Port Unreachable through | |
2786 | * that's generated from a non-listening UDP port. */ | |
880fcd14 | 2787 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 | 2788 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
b73db61d | 2789 | "&& ct_label.blocked == 0", |
78aab811 | 2790 | "next;"); |
880fcd14 | 2791 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 | 2792 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
b73db61d | 2793 | "&& ct_label.blocked == 0", |
78aab811 | 2794 | "next;"); |
e75451fe ZKL |
2795 | |
2796 | /* Ingress and Egress ACL Table (Priority 65535). | |
2797 | * | |
2798 | * Not to do conntrack on ND packets. */ | |
2799 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;"); | |
2800 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;"); | |
78aab811 JP |
2801 | } |
2802 | ||
2803 | /* Ingress or Egress ACL Table (Various priorities). */ | |
9975d7be BP |
2804 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
2805 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 | 2806 | bool ingress = !strcmp(acl->direction, "from-lport") ? true :false; |
880fcd14 | 2807 | enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL; |
78aab811 | 2808 | |
cc58e1f2 RB |
2809 | if (!strcmp(acl->action, "allow") |
2810 | || !strcmp(acl->action, "allow-related")) { | |
78aab811 JP |
2811 | /* If there are any stateful flows, we must even commit "allow" |
2812 | * actions. This is because, while the initiater's | |
2813 | * direction may not have any stateful rules, the server's | |
2814 | * may and then its return traffic would not have an | |
2815 | * associated conntrack entry and would return "+invalid". */ | |
cc58e1f2 RB |
2816 | if (!has_stateful) { |
2817 | ovn_lflow_add(lflows, od, stage, | |
2818 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2819 | acl->match, "next;"); | |
2820 | } else { | |
2821 | struct ds match = DS_EMPTY_INITIALIZER; | |
2822 | ||
2823 | /* Commit the connection tracking entry if it's a new | |
2824 | * connection that matches this ACL. After this commit, | |
2825 | * the reply traffic is allowed by a flow we create at | |
2826 | * priority 65535, defined earlier. | |
2827 | * | |
2828 | * It's also possible that a known connection was marked for | |
2829 | * deletion after a policy was deleted, but the policy was | |
2830 | * re-added while that connection is still known. We catch | |
b73db61d | 2831 | * that case here and un-set ct_label.blocked (which will be done |
cc58e1f2 RB |
2832 | * by ct_commit in the "stateful" stage) to indicate that the |
2833 | * connection should be allowed to resume. | |
2834 | */ | |
2835 | ds_put_format(&match, "((ct.new && !ct.est)" | |
2836 | " || (!ct.new && ct.est && !ct.rpl " | |
b73db61d | 2837 | "&& ct_label.blocked == 1)) " |
cc58e1f2 RB |
2838 | "&& (%s)", acl->match); |
2839 | ovn_lflow_add(lflows, od, stage, | |
2840 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2841 | ds_cstr(&match), | |
2842 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
2843 | ||
2844 | /* Match on traffic in the request direction for an established | |
2845 | * connection tracking entry that has not been marked for | |
2846 | * deletion. There is no need to commit here, so we can just | |
2847 | * proceed to the next table. We use this to ensure that this | |
2848 | * connection is still allowed by the currently defined | |
2849 | * policy. */ | |
2850 | ds_clear(&match); | |
2851 | ds_put_format(&match, | |
2852 | "!ct.new && ct.est && !ct.rpl" | |
b73db61d | 2853 | " && ct_label.blocked == 0 && (%s)", |
cc58e1f2 RB |
2854 | acl->match); |
2855 | ovn_lflow_add(lflows, od, stage, | |
2856 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2857 | ds_cstr(&match), "next;"); | |
2858 | ||
2859 | ds_destroy(&match); | |
2860 | } | |
2861 | } else if (!strcmp(acl->action, "drop") | |
2862 | || !strcmp(acl->action, "reject")) { | |
78aab811 JP |
2863 | struct ds match = DS_EMPTY_INITIALIZER; |
2864 | ||
cc58e1f2 RB |
2865 | /* XXX Need to support "reject", treat it as "drop;" for now. */ |
2866 | if (!strcmp(acl->action, "reject")) { | |
2867 | VLOG_INFO("reject is not a supported action"); | |
2868 | } | |
78aab811 | 2869 | |
cc58e1f2 RB |
2870 | /* The implementation of "drop" differs if stateful ACLs are in |
2871 | * use for this datapath. In that case, the actions differ | |
2872 | * depending on whether the connection was previously committed | |
2873 | * to the connection tracker with ct_commit. */ | |
2874 | if (has_stateful) { | |
2875 | /* If the packet is not part of an established connection, then | |
2876 | * we can simply drop it. */ | |
2877 | ds_put_format(&match, | |
b73db61d | 2878 | "(!ct.est || (ct.est && ct_label.blocked == 1)) " |
cc58e1f2 RB |
2879 | "&& (%s)", |
2880 | acl->match); | |
2881 | ovn_lflow_add(lflows, od, stage, acl->priority + | |
2882 | OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;"); | |
2883 | ||
2884 | /* For an existing connection without ct_label set, we've | |
2885 | * encountered a policy change. ACLs previously allowed | |
2886 | * this connection and we committed the connection tracking | |
2887 | * entry. Current policy says that we should drop this | |
2888 | * connection. First, we set bit 0 of ct_label to indicate | |
2889 | * that this connection is set for deletion. By not | |
2890 | * specifying "next;", we implicitly drop the packet after | |
2891 | * updating conntrack state. We would normally defer | |
2892 | * ct_commit() to the "stateful" stage, but since we're | |
2893 | * dropping the packet, we go ahead and do it here. */ | |
2894 | ds_clear(&match); | |
2895 | ds_put_format(&match, | |
b73db61d | 2896 | "ct.est && ct_label.blocked == 0 && (%s)", |
cc58e1f2 RB |
2897 | acl->match); |
2898 | ovn_lflow_add(lflows, od, stage, | |
2899 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2900 | ds_cstr(&match), "ct_commit(ct_label=1/1);"); | |
2901 | ||
2902 | ds_destroy(&match); | |
2903 | } else { | |
2904 | /* There are no stateful ACLs in use on this datapath, | |
2905 | * so a "drop" ACL is simply the "drop" logical flow action | |
2906 | * in all cases. */ | |
2907 | ovn_lflow_add(lflows, od, stage, | |
2908 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2909 | acl->match, "drop;"); | |
2360b854 | 2910 | ds_destroy(&match); |
cc58e1f2 | 2911 | } |
78aab811 JP |
2912 | } |
2913 | } | |
281977f7 NS |
2914 | |
2915 | /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all | |
2916 | * logical ports of the datapath if the CMS has configured DHCPv4 options*/ | |
052fa3ac BP |
2917 | for (size_t i = 0; i < od->nbs->n_ports; i++) { |
2918 | if (od->nbs->ports[i]->dhcpv4_options) { | |
2919 | const char *server_id = smap_get( | |
2920 | &od->nbs->ports[i]->dhcpv4_options->options, "server_id"); | |
2921 | const char *server_mac = smap_get( | |
2922 | &od->nbs->ports[i]->dhcpv4_options->options, "server_mac"); | |
2923 | const char *lease_time = smap_get( | |
2924 | &od->nbs->ports[i]->dhcpv4_options->options, "lease_time"); | |
b89d25e5 | 2925 | if (server_id && server_mac && lease_time) { |
052fa3ac BP |
2926 | struct ds match = DS_EMPTY_INITIALIZER; |
2927 | const char *actions = | |
2928 | has_stateful ? "ct_commit; next;" : "next;"; | |
2929 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
2930 | "&& ip4.src == %s && udp && udp.src == 67 " | |
2931 | "&& udp.dst == 68", od->nbs->ports[i]->name, | |
2932 | server_mac, server_id); | |
2933 | ovn_lflow_add( | |
2934 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
2935 | actions); | |
75e82c17 | 2936 | ds_destroy(&match); |
281977f7 | 2937 | } |
052fa3ac | 2938 | } |
33ac3c83 | 2939 | |
052fa3ac BP |
2940 | if (od->nbs->ports[i]->dhcpv6_options) { |
2941 | const char *server_mac = smap_get( | |
2942 | &od->nbs->ports[i]->dhcpv6_options->options, "server_id"); | |
2943 | struct eth_addr ea; | |
2944 | if (server_mac && eth_addr_from_string(server_mac, &ea)) { | |
2945 | /* Get the link local IP of the DHCPv6 server from the | |
2946 | * server MAC. */ | |
2947 | struct in6_addr lla; | |
2948 | in6_generate_lla(ea, &lla); | |
2949 | ||
2950 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
2951 | ipv6_string_mapped(server_ip, &lla); | |
2952 | ||
2953 | struct ds match = DS_EMPTY_INITIALIZER; | |
2954 | const char *actions = has_stateful ? "ct_commit; next;" : | |
2955 | "next;"; | |
2956 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
2957 | "&& ip6.src == %s && udp && udp.src == 547 " | |
2958 | "&& udp.dst == 546", od->nbs->ports[i]->name, | |
2959 | server_mac, server_ip); | |
2960 | ovn_lflow_add( | |
2961 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
2962 | actions); | |
75e82c17 | 2963 | ds_destroy(&match); |
33ac3c83 | 2964 | } |
281977f7 NS |
2965 | } |
2966 | } | |
78aab811 JP |
2967 | } |
2968 | ||
1a03fc7d BS |
2969 | static void |
2970 | build_qos(struct ovn_datapath *od, struct hmap *lflows) { | |
2971 | ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;"); | |
2972 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;"); | |
2973 | ||
2974 | for (size_t i = 0; i < od->nbs->n_qos_rules; i++) { | |
2975 | struct nbrec_qos *qos = od->nbs->qos_rules[i]; | |
2976 | bool ingress = !strcmp(qos->direction, "from-lport") ? true :false; | |
2977 | enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK; | |
2978 | ||
2979 | if (!strcmp(qos->key_action, "dscp")) { | |
2980 | struct ds dscp_action = DS_EMPTY_INITIALIZER; | |
2981 | ||
2982 | ds_put_format(&dscp_action, "ip.dscp = %d; next;", | |
2983 | (uint8_t)qos->value_action); | |
2984 | ovn_lflow_add(lflows, od, stage, | |
2985 | qos->priority, | |
2986 | qos->match, ds_cstr(&dscp_action)); | |
2987 | ds_destroy(&dscp_action); | |
2988 | } | |
2989 | } | |
2990 | } | |
2991 | ||
7a15be69 GS |
2992 | static void |
2993 | build_lb(struct ovn_datapath *od, struct hmap *lflows) | |
2994 | { | |
2995 | /* Ingress and Egress LB Table (Priority 0): Packets are allowed by | |
2996 | * default. */ | |
2997 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); | |
2998 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); | |
2999 | ||
3000 | if (od->nbs->load_balancer) { | |
3001 | /* Ingress and Egress LB Table (Priority 65535). | |
3002 | * | |
3003 | * Send established traffic through conntrack for just NAT. */ | |
3004 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX, | |
3005 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
3006 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
3007 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX, | |
3008 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
3009 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
3010 | } | |
3011 | } | |
3012 | ||
fa313a8c GS |
3013 | static void |
3014 | build_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
3015 | { | |
3016 | /* Ingress and Egress stateful Table (Priority 0): Packets are | |
3017 | * allowed by default. */ | |
3018 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;"); | |
3019 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;"); | |
3020 | ||
3021 | /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be | |
b73db61d | 3022 | * committed to conntrack. We always set ct_label.blocked to 0 here as |
cc58e1f2 RB |
3023 | * any packet that makes it this far is part of a connection we |
3024 | * want to allow to continue. */ | |
fa313a8c | 3025 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, |
cc58e1f2 | 3026 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
fa313a8c | 3027 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, |
cc58e1f2 | 3028 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
7a15be69 GS |
3029 | |
3030 | /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent | |
3031 | * through nat (without committing). | |
3032 | * | |
3033 | * REGBIT_CONNTRACK_COMMIT is set for new connections and | |
3034 | * REGBIT_CONNTRACK_NAT is set for established connections. So they | |
3035 | * don't overlap. | |
3036 | */ | |
3037 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, | |
3038 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
3039 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, | |
3040 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
3041 | ||
3042 | /* Load balancing rules for new connections get committed to conntrack | |
3043 | * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table | |
3044 | * a higher priority rule for load balancing below also commits the | |
3045 | * connection, so it is okay if we do not hit the above match on | |
3046 | * REGBIT_CONNTRACK_COMMIT. */ | |
61591ad9 GS |
3047 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { |
3048 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
3049 | struct smap *vips = &lb->vips; |
3050 | struct smap_node *node; | |
3051 | ||
3052 | SMAP_FOR_EACH (node, vips) { | |
3053 | uint16_t port = 0; | |
3054 | ||
3055 | /* node->key contains IP:port or just IP. */ | |
3056 | char *ip_address = NULL; | |
3057 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
3058 | if (!ip_address) { | |
3059 | continue; | |
3060 | } | |
3061 | ||
3062 | /* New connections in Ingress table. */ | |
3063 | char *action = xasprintf("ct_lb(%s);", node->value); | |
3064 | struct ds match = DS_EMPTY_INITIALIZER; | |
9784ffaf | 3065 | ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address); |
7a15be69 GS |
3066 | if (port) { |
3067 | if (lb->protocol && !strcmp(lb->protocol, "udp")) { | |
9784ffaf | 3068 | ds_put_format(&match, " && udp.dst == %d", port); |
7a15be69 | 3069 | } else { |
9784ffaf | 3070 | ds_put_format(&match, " && tcp.dst == %d", port); |
7a15be69 GS |
3071 | } |
3072 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
3073 | 120, ds_cstr(&match), action); | |
3074 | } else { | |
3075 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
3076 | 110, ds_cstr(&match), action); | |
3077 | } | |
3078 | ||
7443e4ec | 3079 | free(ip_address); |
7a15be69 GS |
3080 | ds_destroy(&match); |
3081 | free(action); | |
3082 | } | |
3083 | } | |
fa313a8c GS |
3084 | } |
3085 | ||
bd39395f | 3086 | static void |
9975d7be BP |
3087 | build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, |
3088 | struct hmap *lflows, struct hmap *mcgroups) | |
bd39395f | 3089 | { |
5cff6b99 BP |
3090 | /* This flow table structure is documented in ovn-northd(8), so please |
3091 | * update ovn-northd.8.xml if you change anything. */ | |
3092 | ||
09b39248 JP |
3093 | struct ds match = DS_EMPTY_INITIALIZER; |
3094 | struct ds actions = DS_EMPTY_INITIALIZER; | |
3095 | ||
9975d7be | 3096 | /* Build pre-ACL and ACL tables for both ingress and egress. |
1a03fc7d | 3097 | * Ingress tables 3 through 9. Egress tables 0 through 6. */ |
5868eb24 BP |
3098 | struct ovn_datapath *od; |
3099 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
3100 | if (!od->nbs) { |
3101 | continue; | |
3102 | } | |
3103 | ||
9ab989b7 | 3104 | build_pre_acls(od, lflows); |
7a15be69 | 3105 | build_pre_lb(od, lflows); |
facf8652 | 3106 | build_pre_stateful(od, lflows); |
2d018f9b | 3107 | build_acls(od, lflows); |
1a03fc7d | 3108 | build_qos(od, lflows); |
7a15be69 | 3109 | build_lb(od, lflows); |
fa313a8c | 3110 | build_stateful(od, lflows); |
9975d7be BP |
3111 | } |
3112 | ||
3113 | /* Logical switch ingress table 0: Admission control framework (priority | |
3114 | * 100). */ | |
3115 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3116 | if (!od->nbs) { | |
3117 | continue; | |
3118 | } | |
3119 | ||
bd39395f | 3120 | /* Logical VLANs not supported. */ |
685f4dfe | 3121 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present", |
091e3af9 | 3122 | "drop;"); |
bd39395f BP |
3123 | |
3124 | /* Broadcast/multicast source address is invalid. */ | |
685f4dfe | 3125 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", |
091e3af9 | 3126 | "drop;"); |
bd39395f | 3127 | |
35060cdc BP |
3128 | /* Port security flows have priority 50 (see below) and will continue |
3129 | * to the next table if packet source is acceptable. */ | |
bd39395f BP |
3130 | } |
3131 | ||
685f4dfe NS |
3132 | /* Logical switch ingress table 0: Ingress port security - L2 |
3133 | * (priority 50). | |
3134 | * Ingress table 1: Ingress port security - IP (priority 90 and 80) | |
3135 | * Ingress table 2: Ingress port security - ND (priority 90 and 80) | |
3136 | */ | |
5868eb24 BP |
3137 | struct ovn_port *op; |
3138 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3139 | if (!op->nbsp) { |
9975d7be BP |
3140 | continue; |
3141 | } | |
3142 | ||
0ee00741 | 3143 | if (!lsp_is_enabled(op->nbsp)) { |
96af668a BP |
3144 | /* Drop packets from disabled logical ports (since logical flow |
3145 | * tables are default-drop). */ | |
3146 | continue; | |
3147 | } | |
3148 | ||
09b39248 | 3149 | ds_clear(&match); |
a6095f81 | 3150 | ds_clear(&actions); |
9975d7be | 3151 | ds_put_format(&match, "inport == %s", op->json_key); |
e93b43d6 JP |
3152 | build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs, |
3153 | &match); | |
a6095f81 BS |
3154 | |
3155 | const char *queue_id = smap_get(&op->sb->options, "qdisc_queue_id"); | |
3156 | if (queue_id) { | |
3157 | ds_put_format(&actions, "set_queue(%s); ", queue_id); | |
3158 | } | |
3159 | ds_put_cstr(&actions, "next;"); | |
685f4dfe | 3160 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50, |
a6095f81 | 3161 | ds_cstr(&match), ds_cstr(&actions)); |
685f4dfe | 3162 | |
0ee00741 | 3163 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
3164 | build_port_security_ip(P_IN, op, lflows); |
3165 | build_port_security_nd(op, lflows); | |
3166 | } | |
3167 | } | |
3168 | ||
3169 | /* Ingress table 1 and 2: Port security - IP and ND, by default goto next. | |
3170 | * (priority 0)*/ | |
3171 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3172 | if (!od->nbs) { | |
3173 | continue; | |
3174 | } | |
3175 | ||
3176 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;"); | |
3177 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); | |
5868eb24 | 3178 | } |
445a266a | 3179 | |
1a03fc7d | 3180 | /* Ingress table 10: ARP/ND responder, skip requests coming from localnet |
0b077281 RR |
3181 | * and vtep ports. (priority 100); see ovn-northd.8.xml for the |
3182 | * rationale. */ | |
fa128126 | 3183 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3184 | if (!op->nbsp) { |
fa128126 HZ |
3185 | continue; |
3186 | } | |
3187 | ||
0b077281 RR |
3188 | if ((!strcmp(op->nbsp->type, "localnet")) || |
3189 | (!strcmp(op->nbsp->type, "vtep"))) { | |
09b39248 JP |
3190 | ds_clear(&match); |
3191 | ds_put_format(&match, "inport == %s", op->json_key); | |
e75451fe | 3192 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, |
09b39248 | 3193 | ds_cstr(&match), "next;"); |
fa128126 HZ |
3194 | } |
3195 | } | |
3196 | ||
1a03fc7d | 3197 | /* Ingress table 10: ARP/ND responder, reply for known IPs. |
fa128126 | 3198 | * (priority 50). */ |
57d143eb | 3199 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3200 | if (!op->nbsp) { |
57d143eb HZ |
3201 | continue; |
3202 | } | |
3203 | ||
4c7bf534 | 3204 | /* |
e75451fe | 3205 | * Add ARP/ND reply flows if either the |
4c7bf534 NS |
3206 | * - port is up or |
3207 | * - port type is router | |
3208 | */ | |
0ee00741 | 3209 | if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) { |
4c7bf534 NS |
3210 | continue; |
3211 | } | |
3212 | ||
e93b43d6 JP |
3213 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
3214 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
09b39248 | 3215 | ds_clear(&match); |
e93b43d6 JP |
3216 | ds_put_format(&match, "arp.tpa == %s && arp.op == 1", |
3217 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
09b39248 JP |
3218 | ds_clear(&actions); |
3219 | ds_put_format(&actions, | |
57d143eb | 3220 | "eth.dst = eth.src; " |
e93b43d6 | 3221 | "eth.src = %s; " |
57d143eb HZ |
3222 | "arp.op = 2; /* ARP reply */ " |
3223 | "arp.tha = arp.sha; " | |
e93b43d6 | 3224 | "arp.sha = %s; " |
57d143eb | 3225 | "arp.tpa = arp.spa; " |
e93b43d6 | 3226 | "arp.spa = %s; " |
57d143eb | 3227 | "outport = inport; " |
bf143492 | 3228 | "flags.loopback = 1; " |
57d143eb | 3229 | "output;", |
e93b43d6 JP |
3230 | op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, |
3231 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
e75451fe | 3232 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 3233 | ds_cstr(&match), ds_cstr(&actions)); |
9fcb6a18 BP |
3234 | |
3235 | /* Do not reply to an ARP request from the port that owns the | |
3236 | * address (otherwise a DHCP client that ARPs to check for a | |
3237 | * duplicate address will fail). Instead, forward it the usual | |
3238 | * way. | |
3239 | * | |
3240 | * (Another alternative would be to simply drop the packet. If | |
3241 | * everything is working as it is configured, then this would | |
3242 | * produce equivalent results, since no one should reply to the | |
3243 | * request. But ARPing for one's own IP address is intended to | |
3244 | * detect situations where the network is not working as | |
3245 | * configured, so dropping the request would frustrate that | |
3246 | * intent.) */ | |
3247 | ds_put_format(&match, " && inport == %s", op->json_key); | |
3248 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, | |
3249 | ds_cstr(&match), "next;"); | |
57d143eb | 3250 | } |
7dc88496 | 3251 | |
6fdb7cd6 JP |
3252 | /* For ND solicitations, we need to listen for both the |
3253 | * unicast IPv6 address and its all-nodes multicast address, | |
3254 | * but always respond with the unicast IPv6 address. */ | |
3255 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
09b39248 | 3256 | ds_clear(&match); |
6fdb7cd6 JP |
3257 | ds_put_format(&match, |
3258 | "nd_ns && ip6.dst == {%s, %s} && nd.target == %s", | |
3259 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3260 | op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s, | |
3261 | op->lsp_addrs[i].ipv6_addrs[j].addr_s); | |
3262 | ||
09b39248 JP |
3263 | ds_clear(&actions); |
3264 | ds_put_format(&actions, | |
6fdb7cd6 JP |
3265 | "nd_na { " |
3266 | "eth.src = %s; " | |
3267 | "ip6.src = %s; " | |
3268 | "nd.target = %s; " | |
3269 | "nd.tll = %s; " | |
3270 | "outport = inport; " | |
bf143492 | 3271 | "flags.loopback = 1; " |
6fdb7cd6 JP |
3272 | "output; " |
3273 | "};", | |
3274 | op->lsp_addrs[i].ea_s, | |
3275 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3276 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
3277 | op->lsp_addrs[i].ea_s); | |
e75451fe | 3278 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 3279 | ds_cstr(&match), ds_cstr(&actions)); |
9fcb6a18 BP |
3280 | |
3281 | /* Do not reply to a solicitation from the port that owns the | |
3282 | * address (otherwise DAD detection will fail). */ | |
3283 | ds_put_format(&match, " && inport == %s", op->json_key); | |
3284 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, | |
3285 | ds_cstr(&match), "next;"); | |
e75451fe | 3286 | } |
57d143eb HZ |
3287 | } |
3288 | } | |
3289 | ||
1a03fc7d | 3290 | /* Ingress table 10: ARP/ND responder, by default goto next. |
fa128126 HZ |
3291 | * (priority 0)*/ |
3292 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3293 | if (!od->nbs) { | |
3294 | continue; | |
3295 | } | |
3296 | ||
e75451fe | 3297 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); |
fa128126 HZ |
3298 | } |
3299 | ||
1a03fc7d | 3300 | /* Logical switch ingress table 11 and 12: DHCP options and response |
281977f7 NS |
3301 | * priority 100 flows. */ |
3302 | HMAP_FOR_EACH (op, key_node, ports) { | |
3303 | if (!op->nbsp) { | |
3304 | continue; | |
3305 | } | |
3306 | ||
3307 | if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) { | |
3308 | /* Don't add the DHCP flows if the port is not enabled or if the | |
3309 | * port is a router port. */ | |
3310 | continue; | |
3311 | } | |
3312 | ||
33ac3c83 NS |
3313 | if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) { |
3314 | /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport. | |
3315 | */ | |
281977f7 NS |
3316 | continue; |
3317 | } | |
3318 | ||
3319 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { | |
3320 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
3321 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
3322 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
213615b3 | 3323 | struct ds ipv4_addr_match = DS_EMPTY_INITIALIZER; |
281977f7 NS |
3324 | if (build_dhcpv4_action( |
3325 | op, op->lsp_addrs[i].ipv4_addrs[j].addr, | |
213615b3 | 3326 | &options_action, &response_action, &ipv4_addr_match)) { |
281977f7 NS |
3327 | struct ds match = DS_EMPTY_INITIALIZER; |
3328 | ds_put_format( | |
3329 | &match, "inport == %s && eth.src == %s && " | |
3330 | "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && " | |
3331 | "udp.src == 68 && udp.dst == 67", op->json_key, | |
3332 | op->lsp_addrs[i].ea_s); | |
3333 | ||
3334 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, | |
3335 | 100, ds_cstr(&match), | |
3336 | ds_cstr(&options_action)); | |
213615b3 NS |
3337 | ds_clear(&match); |
3338 | /* Allow ip4.src = OFFER_IP and | |
3339 | * ip4.dst = {SERVER_IP, 255.255.255.255} for the below | |
3340 | * cases | |
3341 | * - When the client wants to renew the IP by sending | |
3342 | * the DHCPREQUEST to the server ip. | |
3343 | * - When the client wants to renew the IP by | |
3344 | * broadcasting the DHCPREQUEST. | |
3345 | */ | |
3346 | ds_put_format( | |
3347 | &match, "inport == %s && eth.src == %s && " | |
3348 | "%s && udp.src == 68 && udp.dst == 67", op->json_key, | |
3349 | op->lsp_addrs[i].ea_s, ds_cstr(&ipv4_addr_match)); | |
3350 | ||
3351 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, | |
3352 | 100, ds_cstr(&match), | |
3353 | ds_cstr(&options_action)); | |
3354 | ds_clear(&match); | |
3355 | ||
281977f7 | 3356 | /* If REGBIT_DHCP_OPTS_RESULT is set, it means the |
213615b3 NS |
3357 | * put_dhcp_opts action is successful. */ |
3358 | ds_put_format( | |
3359 | &match, "inport == %s && eth.src == %s && " | |
3360 | "ip4 && udp.src == 68 && udp.dst == 67" | |
3361 | " && "REGBIT_DHCP_OPTS_RESULT, op->json_key, | |
3362 | op->lsp_addrs[i].ea_s); | |
281977f7 NS |
3363 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, |
3364 | 100, ds_cstr(&match), | |
3365 | ds_cstr(&response_action)); | |
3366 | ds_destroy(&match); | |
3367 | ds_destroy(&options_action); | |
3368 | ds_destroy(&response_action); | |
213615b3 | 3369 | ds_destroy(&ipv4_addr_match); |
281977f7 NS |
3370 | break; |
3371 | } | |
3372 | } | |
33ac3c83 NS |
3373 | |
3374 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
3375 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
3376 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
3377 | if (build_dhcpv6_action( | |
3378 | op, &op->lsp_addrs[i].ipv6_addrs[j].addr, | |
3379 | &options_action, &response_action)) { | |
3380 | struct ds match = DS_EMPTY_INITIALIZER; | |
3381 | ds_put_format( | |
3382 | &match, "inport == %s && eth.src == %s" | |
3383 | " && ip6.dst == ff02::1:2 && udp.src == 546 &&" | |
3384 | " udp.dst == 547", op->json_key, | |
3385 | op->lsp_addrs[i].ea_s); | |
3386 | ||
3387 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100, | |
3388 | ds_cstr(&match), ds_cstr(&options_action)); | |
3389 | ||
3390 | /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the | |
3391 | * put_dhcpv6_opts action is successful */ | |
3392 | ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT); | |
3393 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100, | |
3394 | ds_cstr(&match), ds_cstr(&response_action)); | |
3395 | ds_destroy(&match); | |
3396 | ds_destroy(&options_action); | |
3397 | ds_destroy(&response_action); | |
3398 | break; | |
3399 | } | |
3400 | } | |
281977f7 NS |
3401 | } |
3402 | } | |
3403 | ||
1a03fc7d | 3404 | /* Ingress table 11 and 12: DHCP options and response, by default goto next. |
281977f7 NS |
3405 | * (priority 0). */ |
3406 | ||
3407 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3408 | if (!od->nbs) { | |
3409 | continue; | |
3410 | } | |
3411 | ||
3412 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;"); | |
3413 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); | |
3414 | } | |
3415 | ||
1a03fc7d | 3416 | /* Ingress table 13: Destination lookup, broadcast and multicast handling |
5868eb24 BP |
3417 | * (priority 100). */ |
3418 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3419 | if (!op->nbsp) { |
9975d7be BP |
3420 | continue; |
3421 | } | |
3422 | ||
0ee00741 | 3423 | if (lsp_is_enabled(op->nbsp)) { |
9975d7be | 3424 | ovn_multicast_add(mcgroups, &mc_flood, op); |
445a266a | 3425 | } |
5868eb24 BP |
3426 | } |
3427 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
3428 | if (!od->nbs) { |
3429 | continue; | |
3430 | } | |
3431 | ||
3432 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", | |
5868eb24 | 3433 | "outport = \""MC_FLOOD"\"; output;"); |
bd39395f | 3434 | } |
bd39395f | 3435 | |
1a03fc7d | 3436 | /* Ingress table 13: Destination lookup, unicast handling (priority 50), */ |
5868eb24 | 3437 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3438 | if (!op->nbsp) { |
9975d7be BP |
3439 | continue; |
3440 | } | |
3441 | ||
0ee00741 | 3442 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { |
10c3fcdf | 3443 | /* Addresses are owned by the logical port. |
3444 | * Ethernet address followed by zero or more IPv4 | |
3445 | * or IPv6 addresses (or both). */ | |
74ff3298 | 3446 | struct eth_addr mac; |
10c3fcdf | 3447 | if (ovs_scan(op->nbsp->addresses[i], |
3448 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
09b39248 | 3449 | ds_clear(&match); |
9975d7be BP |
3450 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, |
3451 | ETH_ADDR_ARGS(mac)); | |
5868eb24 | 3452 | |
09b39248 | 3453 | ds_clear(&actions); |
9975d7be BP |
3454 | ds_put_format(&actions, "outport = %s; output;", op->json_key); |
3455 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
5868eb24 | 3456 | ds_cstr(&match), ds_cstr(&actions)); |
0ee00741 HK |
3457 | } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { |
3458 | if (lsp_is_enabled(op->nbsp)) { | |
9975d7be | 3459 | ovn_multicast_add(mcgroups, &mc_unknown, op); |
96af668a BP |
3460 | op->od->has_unknown = true; |
3461 | } | |
6374d518 | 3462 | } else if (is_dynamic_lsp_address(op->nbsp->addresses[i])) { |
8639f9be | 3463 | if (!op->nbsp->dynamic_addresses |
10c3fcdf | 3464 | || !ovs_scan(op->nbsp->dynamic_addresses, |
3465 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
8639f9be ND |
3466 | continue; |
3467 | } | |
3468 | ds_clear(&match); | |
3469 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, | |
3470 | ETH_ADDR_ARGS(mac)); | |
3471 | ||
3472 | ds_clear(&actions); | |
3473 | ds_put_format(&actions, "outport = %s; output;", op->json_key); | |
3474 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
20418099 MS |
3475 | ds_cstr(&match), ds_cstr(&actions)); |
3476 | } else if (!strcmp(op->nbsp->addresses[i], "router")) { | |
3477 | if (!op->peer || !op->peer->nbrp | |
3478 | || !ovs_scan(op->peer->nbrp->mac, | |
3479 | ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac))) { | |
3480 | continue; | |
3481 | } | |
3482 | ds_clear(&match); | |
3483 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, | |
3484 | ETH_ADDR_ARGS(mac)); | |
41a15b71 MS |
3485 | if (op->peer->od->l3dgw_port |
3486 | && op->peer == op->peer->od->l3dgw_port | |
3487 | && op->peer->od->l3redirect_port) { | |
3488 | /* The destination lookup flow for the router's | |
3489 | * distributed gateway port MAC address should only be | |
3490 | * programmed on the "redirect-chassis". */ | |
3491 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
3492 | op->peer->od->l3redirect_port->json_key); | |
3493 | } | |
20418099 MS |
3494 | |
3495 | ds_clear(&actions); | |
3496 | ds_put_format(&actions, "outport = %s; output;", op->json_key); | |
3497 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
8639f9be | 3498 | ds_cstr(&match), ds_cstr(&actions)); |
06a26dd2 MS |
3499 | |
3500 | /* Add ethernet addresses specified in NAT rules on | |
3501 | * distributed logical routers. */ | |
3502 | if (op->peer->od->l3dgw_port | |
3503 | && op->peer == op->peer->od->l3dgw_port) { | |
3504 | for (int i = 0; i < op->peer->od->nbr->n_nat; i++) { | |
3505 | const struct nbrec_nat *nat | |
3506 | = op->peer->od->nbr->nat[i]; | |
3507 | if (!strcmp(nat->type, "dnat_and_snat") | |
3508 | && nat->logical_port && nat->external_mac | |
3509 | && eth_addr_from_string(nat->external_mac, &mac)) { | |
3510 | ||
3511 | ds_clear(&match); | |
3512 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT | |
3513 | " && is_chassis_resident(\"%s\")", | |
3514 | ETH_ADDR_ARGS(mac), | |
3515 | nat->logical_port); | |
3516 | ||
3517 | ds_clear(&actions); | |
3518 | ds_put_format(&actions, "outport = %s; output;", | |
3519 | op->json_key); | |
3520 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, | |
3521 | 50, ds_cstr(&match), | |
3522 | ds_cstr(&actions)); | |
3523 | } | |
3524 | } | |
3525 | } | |
5868eb24 BP |
3526 | } else { |
3527 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
445a266a | 3528 | |
2fa326a3 BP |
3529 | VLOG_INFO_RL(&rl, |
3530 | "%s: invalid syntax '%s' in addresses column", | |
0ee00741 | 3531 | op->nbsp->name, op->nbsp->addresses[i]); |
445a266a BP |
3532 | } |
3533 | } | |
bd39395f BP |
3534 | } |
3535 | ||
1a03fc7d | 3536 | /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */ |
5868eb24 | 3537 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
3538 | if (!od->nbs) { |
3539 | continue; | |
3540 | } | |
3541 | ||
5868eb24 | 3542 | if (od->has_unknown) { |
9975d7be | 3543 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", |
5868eb24 | 3544 | "outport = \""MC_UNKNOWN"\"; output;"); |
445a266a | 3545 | } |
bd39395f BP |
3546 | } |
3547 | ||
94300e09 JP |
3548 | /* Egress tables 6: Egress port security - IP (priority 0) |
3549 | * Egress table 7: Egress port security L2 - multicast/broadcast | |
3550 | * (priority 100). */ | |
5868eb24 | 3551 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
3552 | if (!od->nbs) { |
3553 | continue; | |
3554 | } | |
3555 | ||
685f4dfe NS |
3556 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;"); |
3557 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast", | |
091e3af9 | 3558 | "output;"); |
48f42f3a RB |
3559 | } |
3560 | ||
94300e09 | 3561 | /* Egress table 6: Egress port security - IP (priorities 90 and 80) |
685f4dfe NS |
3562 | * if port security enabled. |
3563 | * | |
94300e09 | 3564 | * Egress table 7: Egress port security - L2 (priorities 50 and 150). |
d770a830 BP |
3565 | * |
3566 | * Priority 50 rules implement port security for enabled logical port. | |
3567 | * | |
3568 | * Priority 150 rules drop packets to disabled logical ports, so that they | |
3569 | * don't even receive multicast or broadcast packets. */ | |
5868eb24 | 3570 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3571 | if (!op->nbsp) { |
9975d7be BP |
3572 | continue; |
3573 | } | |
3574 | ||
09b39248 | 3575 | ds_clear(&match); |
9975d7be | 3576 | ds_put_format(&match, "outport == %s", op->json_key); |
0ee00741 | 3577 | if (lsp_is_enabled(op->nbsp)) { |
e93b43d6 JP |
3578 | build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs, |
3579 | &match); | |
685f4dfe | 3580 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50, |
d770a830 BP |
3581 | ds_cstr(&match), "output;"); |
3582 | } else { | |
685f4dfe | 3583 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150, |
d770a830 BP |
3584 | ds_cstr(&match), "drop;"); |
3585 | } | |
eb00399e | 3586 | |
0ee00741 | 3587 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
3588 | build_port_security_ip(P_OUT, op, lflows); |
3589 | } | |
eb00399e | 3590 | } |
09b39248 JP |
3591 | |
3592 | ds_destroy(&match); | |
3593 | ds_destroy(&actions); | |
9975d7be | 3594 | } |
eb00399e | 3595 | |
9975d7be BP |
3596 | static bool |
3597 | lrport_is_enabled(const struct nbrec_logical_router_port *lrport) | |
3598 | { | |
3599 | return !lrport->enabled || *lrport->enabled; | |
3600 | } | |
3601 | ||
4685e523 JP |
3602 | /* Returns a string of the IP address of the router port 'op' that |
3603 | * overlaps with 'ip_s". If one is not found, returns NULL. | |
3604 | * | |
3605 | * The caller must not free the returned string. */ | |
3606 | static const char * | |
3607 | find_lrp_member_ip(const struct ovn_port *op, const char *ip_s) | |
3608 | { | |
6fdb7cd6 | 3609 | bool is_ipv4 = strchr(ip_s, '.') ? true : false; |
4685e523 | 3610 | |
6fdb7cd6 JP |
3611 | if (is_ipv4) { |
3612 | ovs_be32 ip; | |
4685e523 | 3613 | |
6fdb7cd6 JP |
3614 | if (!ip_parse(ip_s, &ip)) { |
3615 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3616 | VLOG_WARN_RL(&rl, "bad ip address %s", ip_s); | |
3617 | return NULL; | |
3618 | } | |
4685e523 | 3619 | |
6fdb7cd6 JP |
3620 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
3621 | const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i]; | |
3622 | ||
3623 | if (!((na->network ^ ip) & na->mask)) { | |
3624 | /* There should be only 1 interface that matches the | |
3625 | * supplied IP. Otherwise, it's a configuration error, | |
3626 | * because subnets of a router's interfaces should NOT | |
3627 | * overlap. */ | |
3628 | return na->addr_s; | |
3629 | } | |
3630 | } | |
3631 | } else { | |
3632 | struct in6_addr ip6; | |
3633 | ||
3634 | if (!ipv6_parse(ip_s, &ip6)) { | |
3635 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3636 | VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s); | |
3637 | return NULL; | |
3638 | } | |
3639 | ||
3640 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
3641 | const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i]; | |
3642 | struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6); | |
3643 | struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask); | |
3644 | ||
3645 | if (ipv6_is_zero(&and_addr)) { | |
3646 | /* There should be only 1 interface that matches the | |
3647 | * supplied IP. Otherwise, it's a configuration error, | |
3648 | * because subnets of a router's interfaces should NOT | |
3649 | * overlap. */ | |
3650 | return na->addr_s; | |
3651 | } | |
4685e523 JP |
3652 | } |
3653 | } | |
3654 | ||
3655 | return NULL; | |
3656 | } | |
3657 | ||
9975d7be | 3658 | static void |
0bac7164 | 3659 | add_route(struct hmap *lflows, const struct ovn_port *op, |
4685e523 | 3660 | const char *lrp_addr_s, const char *network_s, int plen, |
440a9f4b | 3661 | const char *gateway, const char *policy) |
9975d7be | 3662 | { |
6fdb7cd6 | 3663 | bool is_ipv4 = strchr(network_s, '.') ? true : false; |
a63f7235 | 3664 | struct ds match = DS_EMPTY_INITIALIZER; |
440a9f4b GS |
3665 | const char *dir; |
3666 | uint16_t priority; | |
3667 | ||
3668 | if (policy && !strcmp(policy, "src-ip")) { | |
3669 | dir = "src"; | |
3670 | priority = plen * 2; | |
3671 | } else { | |
3672 | dir = "dst"; | |
3673 | priority = (plen * 2) + 1; | |
3674 | } | |
6fdb7cd6 | 3675 | |
a63f7235 JP |
3676 | /* IPv6 link-local addresses must be scoped to the local router port. */ |
3677 | if (!is_ipv4) { | |
3678 | struct in6_addr network; | |
3679 | ovs_assert(ipv6_parse(network_s, &network)); | |
3680 | if (in6_is_lla(&network)) { | |
3681 | ds_put_format(&match, "inport == %s && ", op->json_key); | |
3682 | } | |
3683 | } | |
440a9f4b | 3684 | ds_put_format(&match, "ip%s.%s == %s/%d", is_ipv4 ? "4" : "6", dir, |
a63f7235 | 3685 | network_s, plen); |
9975d7be BP |
3686 | |
3687 | struct ds actions = DS_EMPTY_INITIALIZER; | |
6fdb7cd6 JP |
3688 | ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx"); |
3689 | ||
9975d7be | 3690 | if (gateway) { |
c9bdf7bd | 3691 | ds_put_cstr(&actions, gateway); |
9975d7be | 3692 | } else { |
6fdb7cd6 | 3693 | ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6"); |
9975d7be | 3694 | } |
4685e523 | 3695 | ds_put_format(&actions, "; " |
6fdb7cd6 | 3696 | "%sreg1 = %s; " |
4685e523 | 3697 | "eth.src = %s; " |
0bac7164 | 3698 | "outport = %s; " |
bf143492 | 3699 | "flags.loopback = 1; " |
0bac7164 | 3700 | "next;", |
6fdb7cd6 | 3701 | is_ipv4 ? "" : "xx", |
4685e523 JP |
3702 | lrp_addr_s, |
3703 | op->lrp_networks.ea_s, | |
3704 | op->json_key); | |
9975d7be BP |
3705 | |
3706 | /* The priority here is calculated to implement longest-prefix-match | |
3707 | * routing. */ | |
440a9f4b | 3708 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, priority, |
a63f7235 JP |
3709 | ds_cstr(&match), ds_cstr(&actions)); |
3710 | ds_destroy(&match); | |
9975d7be | 3711 | ds_destroy(&actions); |
9975d7be BP |
3712 | } |
3713 | ||
28dc3fe9 SR |
3714 | static void |
3715 | build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od, | |
3716 | struct hmap *ports, | |
3717 | const struct nbrec_logical_router_static_route *route) | |
3718 | { | |
6fdb7cd6 | 3719 | ovs_be32 nexthop; |
4685e523 | 3720 | const char *lrp_addr_s; |
6fdb7cd6 JP |
3721 | unsigned int plen; |
3722 | bool is_ipv4; | |
28dc3fe9 | 3723 | |
6fdb7cd6 JP |
3724 | /* Verify that the next hop is an IP address with an all-ones mask. */ |
3725 | char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen); | |
3726 | if (!error) { | |
3727 | if (plen != 32) { | |
3728 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3729 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
3730 | return; | |
3731 | } | |
3732 | is_ipv4 = true; | |
3733 | } else { | |
28dc3fe9 | 3734 | free(error); |
6fdb7cd6 JP |
3735 | |
3736 | struct in6_addr ip6; | |
3737 | char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen); | |
3738 | if (!error) { | |
3739 | if (plen != 128) { | |
3740 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3741 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
3742 | return; | |
3743 | } | |
3744 | is_ipv4 = false; | |
3745 | } else { | |
3746 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3747 | VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop); | |
3748 | free(error); | |
3749 | return; | |
3750 | } | |
28dc3fe9 SR |
3751 | } |
3752 | ||
6fdb7cd6 JP |
3753 | char *prefix_s; |
3754 | if (is_ipv4) { | |
3755 | ovs_be32 prefix; | |
3756 | /* Verify that ip prefix is a valid IPv4 address. */ | |
3757 | error = ip_parse_cidr(route->ip_prefix, &prefix, &plen); | |
3758 | if (error) { | |
3759 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3760 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
3761 | route->ip_prefix); | |
3762 | free(error); | |
3763 | return; | |
3764 | } | |
3765 | prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen))); | |
3766 | } else { | |
3767 | /* Verify that ip prefix is a valid IPv6 address. */ | |
3768 | struct in6_addr prefix; | |
3769 | error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen); | |
3770 | if (error) { | |
3771 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3772 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
3773 | route->ip_prefix); | |
3774 | free(error); | |
3775 | return; | |
3776 | } | |
3777 | struct in6_addr mask = ipv6_create_mask(plen); | |
3778 | struct in6_addr network = ipv6_addr_bitand(&prefix, &mask); | |
3779 | prefix_s = xmalloc(INET6_ADDRSTRLEN); | |
3780 | inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN); | |
28dc3fe9 SR |
3781 | } |
3782 | ||
3783 | /* Find the outgoing port. */ | |
3784 | struct ovn_port *out_port = NULL; | |
3785 | if (route->output_port) { | |
3786 | out_port = ovn_port_find(ports, route->output_port); | |
3787 | if (!out_port) { | |
3788 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3789 | VLOG_WARN_RL(&rl, "Bad out port %s for static route %s", | |
3790 | route->output_port, route->ip_prefix); | |
6fdb7cd6 | 3791 | goto free_prefix_s; |
28dc3fe9 | 3792 | } |
4685e523 | 3793 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
28dc3fe9 SR |
3794 | } else { |
3795 | /* output_port is not specified, find the | |
3796 | * router port matching the next hop. */ | |
3797 | int i; | |
3798 | for (i = 0; i < od->nbr->n_ports; i++) { | |
3799 | struct nbrec_logical_router_port *lrp = od->nbr->ports[i]; | |
3800 | out_port = ovn_port_find(ports, lrp->name); | |
3801 | if (!out_port) { | |
3802 | /* This should not happen. */ | |
3803 | continue; | |
3804 | } | |
3805 | ||
4685e523 JP |
3806 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
3807 | if (lrp_addr_s) { | |
28dc3fe9 SR |
3808 | break; |
3809 | } | |
3810 | } | |
28dc3fe9 SR |
3811 | } |
3812 | ||
4685e523 JP |
3813 | if (!lrp_addr_s) { |
3814 | /* There is no matched out port. */ | |
3815 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3816 | VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s", | |
3817 | route->ip_prefix, route->nexthop); | |
6fdb7cd6 | 3818 | goto free_prefix_s; |
4685e523 JP |
3819 | } |
3820 | ||
440a9f4b GS |
3821 | char *policy = route->policy ? route->policy : "dst-ip"; |
3822 | add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop, | |
3823 | policy); | |
6fdb7cd6 JP |
3824 | |
3825 | free_prefix_s: | |
c9bdf7bd | 3826 | free(prefix_s); |
28dc3fe9 SR |
3827 | } |
3828 | ||
4685e523 | 3829 | static void |
6fdb7cd6 | 3830 | op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast) |
4685e523 JP |
3831 | { |
3832 | if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) { | |
3833 | ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s); | |
3834 | return; | |
3835 | } | |
3836 | ||
3837 | ds_put_cstr(ds, "{"); | |
3838 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
3839 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s); | |
3840 | if (add_bcast) { | |
3841 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s); | |
3842 | } | |
3843 | } | |
3844 | ds_chomp(ds, ' '); | |
3845 | ds_chomp(ds, ','); | |
3846 | ds_put_cstr(ds, "}"); | |
3847 | } | |
3848 | ||
6fdb7cd6 JP |
3849 | static void |
3850 | op_put_v6_networks(struct ds *ds, const struct ovn_port *op) | |
3851 | { | |
3852 | if (op->lrp_networks.n_ipv6_addrs == 1) { | |
3853 | ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s); | |
3854 | return; | |
3855 | } | |
3856 | ||
3857 | ds_put_cstr(ds, "{"); | |
3858 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
3859 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s); | |
3860 | } | |
3861 | ds_chomp(ds, ' '); | |
3862 | ds_chomp(ds, ','); | |
3863 | ds_put_cstr(ds, "}"); | |
3864 | } | |
3865 | ||
65d8810c GS |
3866 | static const char * |
3867 | get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip) | |
3868 | { | |
3869 | char *key = xasprintf("%s_force_snat_ip", key_type); | |
3870 | const char *ip_address = smap_get(&od->nbr->options, key); | |
3871 | free(key); | |
3872 | ||
3873 | if (ip_address) { | |
3874 | ovs_be32 mask; | |
3875 | char *error = ip_parse_masked(ip_address, ip, &mask); | |
3876 | if (error || mask != OVS_BE32_MAX) { | |
3877 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3878 | VLOG_WARN_RL(&rl, "bad ip %s in options of router "UUID_FMT"", | |
3879 | ip_address, UUID_ARGS(&od->key)); | |
3880 | free(error); | |
3881 | *ip = 0; | |
3882 | return NULL; | |
3883 | } | |
3884 | return ip_address; | |
3885 | } | |
3886 | ||
3887 | *ip = 0; | |
3888 | return NULL; | |
3889 | } | |
3890 | ||
3891 | static void | |
3892 | add_router_lb_flow(struct hmap *lflows, struct ovn_datapath *od, | |
3893 | struct ds *match, struct ds *actions, int priority, | |
3894 | const char *lb_force_snat_ip) | |
3895 | { | |
3896 | /* A match and actions for new connections. */ | |
3897 | char *new_match = xasprintf("ct.new && %s", ds_cstr(match)); | |
3898 | if (lb_force_snat_ip) { | |
3899 | char *new_actions = xasprintf("flags.force_snat_for_lb = 1; %s", | |
3900 | ds_cstr(actions)); | |
3901 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match, | |
3902 | new_actions); | |
3903 | free(new_actions); | |
3904 | } else { | |
3905 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, new_match, | |
3906 | ds_cstr(actions)); | |
3907 | } | |
3908 | ||
3909 | /* A match and actions for established connections. */ | |
3910 | char *est_match = xasprintf("ct.est && %s", ds_cstr(match)); | |
3911 | if (lb_force_snat_ip) { | |
3912 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match, | |
3913 | "flags.force_snat_for_lb = 1; ct_dnat;"); | |
3914 | } else { | |
3915 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, priority, est_match, | |
3916 | "ct_dnat;"); | |
3917 | } | |
3918 | ||
3919 | free(new_match); | |
3920 | free(est_match); | |
3921 | } | |
3922 | ||
9975d7be BP |
3923 | static void |
3924 | build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, | |
3925 | struct hmap *lflows) | |
3926 | { | |
3927 | /* This flow table structure is documented in ovn-northd(8), so please | |
3928 | * update ovn-northd.8.xml if you change anything. */ | |
3929 | ||
09b39248 JP |
3930 | struct ds match = DS_EMPTY_INITIALIZER; |
3931 | struct ds actions = DS_EMPTY_INITIALIZER; | |
3932 | ||
9975d7be BP |
3933 | /* Logical router ingress table 0: Admission control framework. */ |
3934 | struct ovn_datapath *od; | |
3935 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3936 | if (!od->nbr) { | |
3937 | continue; | |
3938 | } | |
3939 | ||
3940 | /* Logical VLANs not supported. | |
3941 | * Broadcast/multicast source address is invalid. */ | |
3942 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, | |
3943 | "vlan.present || eth.src[40]", "drop;"); | |
3944 | } | |
3945 | ||
3946 | /* Logical router ingress table 0: match (priority 50). */ | |
3947 | struct ovn_port *op; | |
3948 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3949 | if (!op->nbrp) { |
9975d7be BP |
3950 | continue; |
3951 | } | |
3952 | ||
0ee00741 | 3953 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
3954 | /* Drop packets from disabled logical ports (since logical flow |
3955 | * tables are default-drop). */ | |
3956 | continue; | |
3957 | } | |
3958 | ||
41a15b71 MS |
3959 | if (op->derived) { |
3960 | /* No ingress packets should be received on a chassisredirect | |
3961 | * port. */ | |
3962 | continue; | |
3963 | } | |
3964 | ||
3965 | ds_clear(&match); | |
3966 | ds_put_format(&match, "eth.mcast && inport == %s", op->json_key); | |
3967 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, | |
3968 | ds_cstr(&match), "next;"); | |
3969 | ||
09b39248 | 3970 | ds_clear(&match); |
41a15b71 | 3971 | ds_put_format(&match, "eth.dst == %s && inport == %s", |
4685e523 | 3972 | op->lrp_networks.ea_s, op->json_key); |
41a15b71 MS |
3973 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
3974 | && op->od->l3redirect_port) { | |
3975 | /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s | |
3976 | * should only be received on the "redirect-chassis". */ | |
3977 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
3978 | op->od->l3redirect_port->json_key); | |
3979 | } | |
9975d7be | 3980 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, |
09b39248 | 3981 | ds_cstr(&match), "next;"); |
9975d7be BP |
3982 | } |
3983 | ||
3984 | /* Logical router ingress table 1: IP Input. */ | |
78aab811 | 3985 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
3986 | if (!od->nbr) { |
3987 | continue; | |
3988 | } | |
3989 | ||
3990 | /* L3 admission control: drop multicast and broadcast source, localhost | |
3991 | * source or destination, and zero network source or destination | |
3992 | * (priority 100). */ | |
3993 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, | |
3994 | "ip4.mcast || " | |
3995 | "ip4.src == 255.255.255.255 || " | |
3996 | "ip4.src == 127.0.0.0/8 || " | |
3997 | "ip4.dst == 127.0.0.0/8 || " | |
3998 | "ip4.src == 0.0.0.0/8 || " | |
3999 | "ip4.dst == 0.0.0.0/8", | |
4000 | "drop;"); | |
4001 | ||
0bac7164 BP |
4002 | /* ARP reply handling. Use ARP replies to populate the logical |
4003 | * router's ARP table. */ | |
4004 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", | |
4005 | "put_arp(inport, arp.spa, arp.sha);"); | |
4006 | ||
9975d7be BP |
4007 | /* Drop Ethernet local broadcast. By definition this traffic should |
4008 | * not be forwarded.*/ | |
4009 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, | |
4010 | "eth.bcast", "drop;"); | |
4011 | ||
9975d7be BP |
4012 | /* TTL discard. |
4013 | * | |
4014 | * XXX Need to send ICMP time exceeded if !ip.later_frag. */ | |
09b39248 JP |
4015 | ds_clear(&match); |
4016 | ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}"); | |
4017 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, | |
4018 | ds_cstr(&match), "drop;"); | |
9975d7be | 4019 | |
c34a87b6 JP |
4020 | /* ND advertisement handling. Use advertisements to populate |
4021 | * the logical router's ARP/ND table. */ | |
4022 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na", | |
4023 | "put_nd(inport, nd.target, nd.tll);"); | |
4024 | ||
4025 | /* Lean from neighbor solicitations that were not directed at | |
4026 | * us. (A priority-90 flow will respond to requests to us and | |
4027 | * learn the sender's mac address. */ | |
4028 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns", | |
4029 | "put_nd(inport, ip6.src, nd.sll);"); | |
4030 | ||
9975d7be BP |
4031 | /* Pass other traffic not already handled to the next table for |
4032 | * routing. */ | |
4033 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); | |
78aab811 JP |
4034 | } |
4035 | ||
6fdb7cd6 | 4036 | /* Logical router ingress table 1: IP Input for IPv4. */ |
9975d7be | 4037 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 4038 | if (!op->nbrp) { |
9975d7be BP |
4039 | continue; |
4040 | } | |
4041 | ||
41a15b71 MS |
4042 | if (op->derived) { |
4043 | /* No ingress packets are accepted on a chassisredirect | |
4044 | * port, so no need to program flows for that port. */ | |
4045 | continue; | |
4046 | } | |
9975d7be | 4047 | |
6fdb7cd6 JP |
4048 | if (op->lrp_networks.n_ipv4_addrs) { |
4049 | /* L3 admission control: drop packets that originate from an | |
4050 | * IPv4 address owned by the router or a broadcast address | |
4051 | * known to the router (priority 100). */ | |
4052 | ds_clear(&match); | |
4053 | ds_put_cstr(&match, "ip4.src == "); | |
4054 | op_put_v4_networks(&match, op, true); | |
06a26dd2 | 4055 | ds_put_cstr(&match, " && "REGBIT_EGRESS_LOOPBACK" == 0"); |
6fdb7cd6 JP |
4056 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, |
4057 | ds_cstr(&match), "drop;"); | |
4058 | ||
4059 | /* ICMP echo reply. These flows reply to ICMP echo requests | |
4060 | * received for the router's IP address. Since packets only | |
4061 | * get here as part of the logical router datapath, the inport | |
4062 | * (i.e. the incoming locally attached net) does not matter. | |
4063 | * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ | |
4064 | ds_clear(&match); | |
4065 | ds_put_cstr(&match, "ip4.dst == "); | |
4066 | op_put_v4_networks(&match, op, false); | |
4067 | ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0"); | |
4068 | ||
4069 | ds_clear(&actions); | |
4070 | ds_put_format(&actions, | |
4071 | "ip4.dst <-> ip4.src; " | |
4072 | "ip.ttl = 255; " | |
4073 | "icmp4.type = 0; " | |
bf143492 | 4074 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4075 | "next; "); |
4076 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4077 | ds_cstr(&match), ds_cstr(&actions)); | |
4078 | } | |
dd7652e6 | 4079 | |
9975d7be BP |
4080 | /* ARP reply. These flows reply to ARP requests for the router's own |
4081 | * IP address. */ | |
4685e523 JP |
4082 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
4083 | ds_clear(&match); | |
4084 | ds_put_format(&match, | |
4085 | "inport == %s && arp.tpa == %s && arp.op == 1", | |
4086 | op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s); | |
41a15b71 MS |
4087 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
4088 | && op->od->l3redirect_port) { | |
4089 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4090 | * should only be sent from the "redirect-chassis", so that | |
4091 | * upstream MAC learning points to the "redirect-chassis". | |
4092 | * Also need to avoid generation of multiple ARP responses | |
4093 | * from different chassis. */ | |
4094 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4095 | op->od->l3redirect_port->json_key); | |
4096 | } | |
4685e523 JP |
4097 | |
4098 | ds_clear(&actions); | |
4099 | ds_put_format(&actions, | |
4100 | "eth.dst = eth.src; " | |
4101 | "eth.src = %s; " | |
4102 | "arp.op = 2; /* ARP reply */ " | |
4103 | "arp.tha = arp.sha; " | |
4104 | "arp.sha = %s; " | |
4105 | "arp.tpa = arp.spa; " | |
4106 | "arp.spa = %s; " | |
4107 | "outport = %s; " | |
bf143492 | 4108 | "flags.loopback = 1; " |
4685e523 JP |
4109 | "output;", |
4110 | op->lrp_networks.ea_s, | |
4111 | op->lrp_networks.ea_s, | |
4112 | op->lrp_networks.ipv4_addrs[i].addr_s, | |
4113 | op->json_key); | |
4114 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4115 | ds_cstr(&match), ds_cstr(&actions)); | |
4116 | } | |
9975d7be | 4117 | |
cc4583aa GS |
4118 | /* A set to hold all load-balancer vips that need ARP responses. */ |
4119 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
e914fb54 | 4120 | get_router_load_balancer_ips(op->od, &all_ips); |
cc4583aa GS |
4121 | |
4122 | const char *ip_address; | |
4123 | SSET_FOR_EACH(ip_address, &all_ips) { | |
4124 | ovs_be32 ip; | |
4125 | if (!ip_parse(ip_address, &ip) || !ip) { | |
4126 | continue; | |
4127 | } | |
4128 | ||
4129 | ds_clear(&match); | |
4130 | ds_put_format(&match, | |
4131 | "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", | |
4132 | op->json_key, IP_ARGS(ip)); | |
4133 | ||
4134 | ds_clear(&actions); | |
4135 | ds_put_format(&actions, | |
4136 | "eth.dst = eth.src; " | |
4137 | "eth.src = %s; " | |
4138 | "arp.op = 2; /* ARP reply */ " | |
4139 | "arp.tha = arp.sha; " | |
4140 | "arp.sha = %s; " | |
4141 | "arp.tpa = arp.spa; " | |
4142 | "arp.spa = "IP_FMT"; " | |
4143 | "outport = %s; " | |
4144 | "flags.loopback = 1; " | |
4145 | "output;", | |
4146 | op->lrp_networks.ea_s, | |
4147 | op->lrp_networks.ea_s, | |
4148 | IP_ARGS(ip), | |
4149 | op->json_key); | |
4150 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4151 | ds_cstr(&match), ds_cstr(&actions)); | |
4152 | } | |
4153 | ||
4154 | sset_destroy(&all_ips); | |
4155 | ||
65d8810c GS |
4156 | /* A gateway router can have 2 SNAT IP addresses to force DNATed and |
4157 | * LBed traffic respectively to be SNATed. In addition, there can be | |
4158 | * a number of SNAT rules in the NAT table. */ | |
4159 | ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * | |
4160 | (op->od->nbr->n_nat + 2)); | |
dde5ea7b | 4161 | size_t n_snat_ips = 0; |
65d8810c GS |
4162 | |
4163 | ovs_be32 snat_ip; | |
4164 | const char *dnat_force_snat_ip = get_force_snat_ip(op->od, "dnat", | |
4165 | &snat_ip); | |
4166 | if (dnat_force_snat_ip) { | |
4167 | snat_ips[n_snat_ips++] = snat_ip; | |
4168 | } | |
4169 | ||
4170 | const char *lb_force_snat_ip = get_force_snat_ip(op->od, "lb", | |
4171 | &snat_ip); | |
4172 | if (lb_force_snat_ip) { | |
4173 | snat_ips[n_snat_ips++] = snat_ip; | |
4174 | } | |
4175 | ||
de297547 GS |
4176 | for (int i = 0; i < op->od->nbr->n_nat; i++) { |
4177 | const struct nbrec_nat *nat; | |
4178 | ||
4179 | nat = op->od->nbr->nat[i]; | |
4180 | ||
de297547 GS |
4181 | ovs_be32 ip; |
4182 | if (!ip_parse(nat->external_ip, &ip) || !ip) { | |
4183 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
dde5ea7b | 4184 | VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration " |
de297547 GS |
4185 | "for router %s", nat->external_ip, op->key); |
4186 | continue; | |
4187 | } | |
4188 | ||
dde5ea7b GS |
4189 | if (!strcmp(nat->type, "snat")) { |
4190 | snat_ips[n_snat_ips++] = ip; | |
4191 | continue; | |
4192 | } | |
4193 | ||
4194 | /* ARP handling for external IP addresses. | |
4195 | * | |
4196 | * DNAT IP addresses are external IP addresses that need ARP | |
4197 | * handling. */ | |
09b39248 JP |
4198 | ds_clear(&match); |
4199 | ds_put_format(&match, | |
4200 | "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", | |
4201 | op->json_key, IP_ARGS(ip)); | |
4685e523 | 4202 | |
09b39248 JP |
4203 | ds_clear(&actions); |
4204 | ds_put_format(&actions, | |
de297547 | 4205 | "eth.dst = eth.src; " |
de297547 | 4206 | "arp.op = 2; /* ARP reply */ " |
06a26dd2 MS |
4207 | "arp.tha = arp.sha; "); |
4208 | ||
4209 | if (op->od->l3dgw_port && op == op->od->l3dgw_port) { | |
4210 | struct eth_addr mac; | |
4211 | if (nat->external_mac && | |
4212 | eth_addr_from_string(nat->external_mac, &mac) | |
4213 | && nat->logical_port) { | |
4214 | /* distributed NAT case, use nat->external_mac */ | |
4215 | ds_put_format(&actions, | |
4216 | "eth.src = "ETH_ADDR_FMT"; " | |
4217 | "arp.sha = "ETH_ADDR_FMT"; ", | |
4218 | ETH_ADDR_ARGS(mac), | |
4219 | ETH_ADDR_ARGS(mac)); | |
4220 | /* Traffic with eth.src = nat->external_mac should only be | |
4221 | * sent from the chassis where nat->logical_port is | |
4222 | * resident, so that upstream MAC learning points to the | |
4223 | * correct chassis. Also need to avoid generation of | |
4224 | * multiple ARP responses from different chassis. */ | |
4225 | ds_put_format(&match, " && is_chassis_resident(\"%s\")", | |
4226 | nat->logical_port); | |
4227 | } else { | |
4228 | ds_put_format(&actions, | |
4229 | "eth.src = %s; " | |
4230 | "arp.sha = %s; ", | |
4231 | op->lrp_networks.ea_s, | |
4232 | op->lrp_networks.ea_s); | |
4233 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4234 | * should only be sent from the "redirect-chassis", so that | |
4235 | * upstream MAC learning points to the "redirect-chassis". | |
4236 | * Also need to avoid generation of multiple ARP responses | |
4237 | * from different chassis. */ | |
4238 | if (op->od->l3redirect_port) { | |
4239 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4240 | op->od->l3redirect_port->json_key); | |
4241 | } | |
4242 | } | |
4243 | } else { | |
4244 | ds_put_format(&actions, | |
4245 | "eth.src = %s; " | |
4246 | "arp.sha = %s; ", | |
4247 | op->lrp_networks.ea_s, | |
4248 | op->lrp_networks.ea_s); | |
4249 | } | |
4250 | ds_put_format(&actions, | |
de297547 GS |
4251 | "arp.tpa = arp.spa; " |
4252 | "arp.spa = "IP_FMT"; " | |
4253 | "outport = %s; " | |
bf143492 | 4254 | "flags.loopback = 1; " |
de297547 | 4255 | "output;", |
de297547 GS |
4256 | IP_ARGS(ip), |
4257 | op->json_key); | |
4258 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
09b39248 | 4259 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
4260 | } |
4261 | ||
4685e523 JP |
4262 | ds_clear(&match); |
4263 | ds_put_cstr(&match, "ip4.dst == {"); | |
4264 | bool has_drop_ips = false; | |
4265 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
49da9ec0 | 4266 | bool snat_ip_is_router_ip = false; |
dde5ea7b GS |
4267 | for (int j = 0; j < n_snat_ips; j++) { |
4268 | /* Packets to SNAT IPs should not be dropped. */ | |
4269 | if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) { | |
49da9ec0 CSV |
4270 | snat_ip_is_router_ip = true; |
4271 | break; | |
4685e523 | 4272 | } |
4ef48e9d | 4273 | } |
49da9ec0 CSV |
4274 | if (snat_ip_is_router_ip) { |
4275 | continue; | |
4276 | } | |
4685e523 JP |
4277 | ds_put_format(&match, "%s, ", |
4278 | op->lrp_networks.ipv4_addrs[i].addr_s); | |
4279 | has_drop_ips = true; | |
4ef48e9d | 4280 | } |
4685e523 JP |
4281 | ds_chomp(&match, ' '); |
4282 | ds_chomp(&match, ','); | |
4283 | ds_put_cstr(&match, "}"); | |
4ef48e9d | 4284 | |
4685e523 JP |
4285 | if (has_drop_ips) { |
4286 | /* Drop IP traffic to this router. */ | |
09b39248 JP |
4287 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, |
4288 | ds_cstr(&match), "drop;"); | |
4ef48e9d | 4289 | } |
4685e523 | 4290 | |
dde5ea7b | 4291 | free(snat_ips); |
9975d7be BP |
4292 | } |
4293 | ||
6fdb7cd6 JP |
4294 | /* Logical router ingress table 1: IP Input for IPv6. */ |
4295 | HMAP_FOR_EACH (op, key_node, ports) { | |
4296 | if (!op->nbrp) { | |
4297 | continue; | |
4298 | } | |
4299 | ||
41a15b71 MS |
4300 | if (op->derived) { |
4301 | /* No ingress packets are accepted on a chassisredirect | |
4302 | * port, so no need to program flows for that port. */ | |
4303 | continue; | |
4304 | } | |
4305 | ||
6fdb7cd6 JP |
4306 | if (op->lrp_networks.n_ipv6_addrs) { |
4307 | /* L3 admission control: drop packets that originate from an | |
4308 | * IPv6 address owned by the router (priority 100). */ | |
4309 | ds_clear(&match); | |
4310 | ds_put_cstr(&match, "ip6.src == "); | |
4311 | op_put_v6_networks(&match, op); | |
4312 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, | |
4313 | ds_cstr(&match), "drop;"); | |
4314 | ||
4315 | /* ICMPv6 echo reply. These flows reply to echo requests | |
4316 | * received for the router's IP address. */ | |
4317 | ds_clear(&match); | |
4318 | ds_put_cstr(&match, "ip6.dst == "); | |
4319 | op_put_v6_networks(&match, op); | |
4320 | ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0"); | |
4321 | ||
4322 | ds_clear(&actions); | |
4323 | ds_put_cstr(&actions, | |
4324 | "ip6.dst <-> ip6.src; " | |
4325 | "ip.ttl = 255; " | |
4326 | "icmp6.type = 129; " | |
bf143492 | 4327 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4328 | "next; "); |
4329 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4330 | ds_cstr(&match), ds_cstr(&actions)); | |
4331 | ||
4332 | /* Drop IPv6 traffic to this router. */ | |
4333 | ds_clear(&match); | |
4334 | ds_put_cstr(&match, "ip6.dst == "); | |
4335 | op_put_v6_networks(&match, op); | |
4336 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, | |
4337 | ds_cstr(&match), "drop;"); | |
4338 | } | |
4339 | ||
4340 | /* ND reply. These flows reply to ND solicitations for the | |
4341 | * router's own IP address. */ | |
4342 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
4343 | ds_clear(&match); | |
4344 | ds_put_format(&match, | |
4345 | "inport == %s && nd_ns && ip6.dst == {%s, %s} " | |
4346 | "&& nd.target == %s", | |
4347 | op->json_key, | |
4348 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4349 | op->lrp_networks.ipv6_addrs[i].sn_addr_s, | |
4350 | op->lrp_networks.ipv6_addrs[i].addr_s); | |
41a15b71 MS |
4351 | if (op->od->l3dgw_port && op == op->od->l3dgw_port |
4352 | && op->od->l3redirect_port) { | |
4353 | /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s | |
4354 | * should only be sent from the "redirect-chassis", so that | |
4355 | * upstream MAC learning points to the "redirect-chassis". | |
4356 | * Also need to avoid generation of multiple ND replies | |
4357 | * from different chassis. */ | |
4358 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4359 | op->od->l3redirect_port->json_key); | |
4360 | } | |
6fdb7cd6 JP |
4361 | |
4362 | ds_clear(&actions); | |
4363 | ds_put_format(&actions, | |
c34a87b6 | 4364 | "put_nd(inport, ip6.src, nd.sll); " |
6fdb7cd6 JP |
4365 | "nd_na { " |
4366 | "eth.src = %s; " | |
4367 | "ip6.src = %s; " | |
4368 | "nd.target = %s; " | |
4369 | "nd.tll = %s; " | |
4370 | "outport = inport; " | |
bf143492 | 4371 | "flags.loopback = 1; " |
6fdb7cd6 JP |
4372 | "output; " |
4373 | "};", | |
4374 | op->lrp_networks.ea_s, | |
4375 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4376 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
4377 | op->lrp_networks.ea_s); | |
4378 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
4379 | ds_cstr(&match), ds_cstr(&actions)); | |
4380 | } | |
4381 | } | |
4382 | ||
06a26dd2 | 4383 | /* NAT, Defrag and load balancing. */ |
de297547 GS |
4384 | HMAP_FOR_EACH (od, key_node, datapaths) { |
4385 | if (!od->nbr) { | |
4386 | continue; | |
4387 | } | |
4388 | ||
4389 | /* Packets are allowed by default. */ | |
cc4583aa | 4390 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;"); |
de297547 GS |
4391 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); |
4392 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); | |
4393 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); | |
06a26dd2 MS |
4394 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;"); |
4395 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;"); | |
de297547 | 4396 | |
06a26dd2 MS |
4397 | /* NAT rules are only valid on Gateway routers and routers with |
4398 | * l3dgw_port (router has a port with "redirect-chassis" | |
4399 | * specified). */ | |
4400 | if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) { | |
de297547 GS |
4401 | continue; |
4402 | } | |
4403 | ||
65d8810c GS |
4404 | ovs_be32 snat_ip; |
4405 | const char *dnat_force_snat_ip = get_force_snat_ip(od, "dnat", | |
4406 | &snat_ip); | |
4407 | const char *lb_force_snat_ip = get_force_snat_ip(od, "lb", | |
4408 | &snat_ip); | |
4409 | ||
de297547 GS |
4410 | for (int i = 0; i < od->nbr->n_nat; i++) { |
4411 | const struct nbrec_nat *nat; | |
4412 | ||
4413 | nat = od->nbr->nat[i]; | |
4414 | ||
4415 | ovs_be32 ip, mask; | |
4416 | ||
4417 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
4418 | if (error || mask != OVS_BE32_MAX) { | |
4419 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
4420 | VLOG_WARN_RL(&rl, "bad external ip %s for nat", | |
4421 | nat->external_ip); | |
4422 | free(error); | |
4423 | continue; | |
4424 | } | |
4425 | ||
4426 | /* Check the validity of nat->logical_ip. 'logical_ip' can | |
4427 | * be a subnet when the type is "snat". */ | |
4428 | error = ip_parse_masked(nat->logical_ip, &ip, &mask); | |
4429 | if (!strcmp(nat->type, "snat")) { | |
4430 | if (error) { | |
4431 | static struct vlog_rate_limit rl = | |
4432 | VLOG_RATE_LIMIT_INIT(5, 1); | |
4433 | VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " | |
4434 | "in router "UUID_FMT"", | |
4435 | nat->logical_ip, UUID_ARGS(&od->key)); | |
4436 | free(error); | |
4437 | continue; | |
4438 | } | |
4439 | } else { | |
4440 | if (error || mask != OVS_BE32_MAX) { | |
4441 | static struct vlog_rate_limit rl = | |
4442 | VLOG_RATE_LIMIT_INIT(5, 1); | |
4443 | VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " | |
4444 | ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); | |
4445 | free(error); | |
4446 | continue; | |
4447 | } | |
4448 | } | |
4449 | ||
06a26dd2 MS |
4450 | /* For distributed router NAT, determine whether this NAT rule |
4451 | * satisfies the conditions for distributed NAT processing. */ | |
4452 | bool distributed = false; | |
4453 | struct eth_addr mac; | |
4454 | if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") && | |
4455 | nat->logical_port && nat->external_mac) { | |
4456 | if (eth_addr_from_string(nat->external_mac, &mac)) { | |
4457 | distributed = true; | |
4458 | } else { | |
4459 | static struct vlog_rate_limit rl = | |
4460 | VLOG_RATE_LIMIT_INIT(5, 1); | |
4461 | VLOG_WARN_RL(&rl, "bad mac %s for dnat in router " | |
4462 | ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key)); | |
4463 | continue; | |
4464 | } | |
4465 | } | |
4466 | ||
de297547 GS |
4467 | /* Ingress UNSNAT table: It is for already established connections' |
4468 | * reverse traffic. i.e., SNAT has already been done in egress | |
4469 | * pipeline and now the packet has entered the ingress pipeline as | |
4470 | * part of a reply. We undo the SNAT here. | |
4471 | * | |
4472 | * Undoing SNAT has to happen before DNAT processing. This is | |
4473 | * because when the packet was DNATed in ingress pipeline, it did | |
4474 | * not know about the possibility of eventual additional SNAT in | |
4475 | * egress pipeline. */ | |
4476 | if (!strcmp(nat->type, "snat") | |
4477 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
4478 | if (!od->l3dgw_port) { |
4479 | /* Gateway router. */ | |
4480 | ds_clear(&match); | |
4481 | ds_put_format(&match, "ip && ip4.dst == %s", | |
4482 | nat->external_ip); | |
4483 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 90, | |
4484 | ds_cstr(&match), "ct_snat; next;"); | |
4485 | } else { | |
4486 | /* Distributed router. */ | |
4487 | ||
4488 | /* Traffic received on l3dgw_port is subject to NAT. */ | |
4489 | ds_clear(&match); | |
4490 | ds_put_format(&match, "ip && ip4.dst == %s" | |
4491 | " && inport == %s", | |
4492 | nat->external_ip, | |
4493 | od->l3dgw_port->json_key); | |
4494 | if (!distributed && od->l3redirect_port) { | |
4495 | /* Flows for NAT rules that are centralized are only | |
4496 | * programmed on the "redirect-chassis". */ | |
4497 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4498 | od->l3redirect_port->json_key); | |
4499 | } | |
4500 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, | |
4501 | ds_cstr(&match), "ct_snat;"); | |
4502 | ||
4503 | /* Traffic received on other router ports must be | |
4504 | * redirected to the central instance of the l3dgw_port | |
4505 | * for NAT processing. */ | |
4506 | ds_clear(&match); | |
4507 | ds_put_format(&match, "ip && ip4.dst == %s", | |
4508 | nat->external_ip); | |
4509 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 50, | |
4510 | ds_cstr(&match), | |
4511 | REGBIT_NAT_REDIRECT" = 1; next;"); | |
4512 | } | |
de297547 GS |
4513 | } |
4514 | ||
4515 | /* Ingress DNAT table: Packets enter the pipeline with destination | |
4516 | * IP address that needs to be DNATted from a external IP address | |
4517 | * to a logical IP address. */ | |
4518 | if (!strcmp(nat->type, "dnat") | |
4519 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
4520 | if (!od->l3dgw_port) { |
4521 | /* Gateway router. */ | |
4522 | /* Packet when it goes from the initiator to destination. | |
4523 | * We need to set flags.loopback because the router can | |
4524 | * send the packet back through the same interface. */ | |
4525 | ds_clear(&match); | |
4526 | ds_put_format(&match, "ip && ip4.dst == %s", | |
4527 | nat->external_ip); | |
4528 | ds_clear(&actions); | |
4529 | if (dnat_force_snat_ip) { | |
4530 | /* Indicate to the future tables that a DNAT has taken | |
4531 | * place and a force SNAT needs to be done in the | |
4532 | * Egress SNAT table. */ | |
4533 | ds_put_format(&actions, | |
4534 | "flags.force_snat_for_dnat = 1; "); | |
4535 | } | |
4536 | ds_put_format(&actions, "flags.loopback = 1; ct_dnat(%s);", | |
4537 | nat->logical_ip); | |
4538 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, | |
4539 | ds_cstr(&match), ds_cstr(&actions)); | |
4540 | } else { | |
4541 | /* Distributed router. */ | |
4542 | ||
4543 | /* Traffic received on l3dgw_port is subject to NAT. */ | |
4544 | ds_clear(&match); | |
4545 | ds_put_format(&match, "ip && ip4.dst == %s" | |
4546 | " && inport == %s", | |
4547 | nat->external_ip, | |
4548 | od->l3dgw_port->json_key); | |
4549 | if (!distributed && od->l3redirect_port) { | |
4550 | /* Flows for NAT rules that are centralized are only | |
4551 | * programmed on the "redirect-chassis". */ | |
4552 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4553 | od->l3redirect_port->json_key); | |
4554 | } | |
4555 | ds_clear(&actions); | |
4556 | ds_put_format(&actions, "ct_dnat(%s);", | |
4557 | nat->logical_ip); | |
4558 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, | |
4559 | ds_cstr(&match), ds_cstr(&actions)); | |
4560 | ||
4561 | /* Traffic received on other router ports must be | |
4562 | * redirected to the central instance of the l3dgw_port | |
4563 | * for NAT processing. */ | |
4564 | ds_clear(&match); | |
4565 | ds_put_format(&match, "ip && ip4.dst == %s", | |
4566 | nat->external_ip); | |
4567 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
4568 | ds_cstr(&match), | |
4569 | REGBIT_NAT_REDIRECT" = 1; next;"); | |
4570 | } | |
4571 | } | |
4572 | ||
4573 | /* Egress UNDNAT table: It is for already established connections' | |
4574 | * reverse traffic. i.e., DNAT has already been done in ingress | |
4575 | * pipeline and now the packet has entered the egress pipeline as | |
4576 | * part of a reply. We undo the DNAT here. | |
4577 | * | |
4578 | * Note that this only applies for NAT on a distributed router. | |
4579 | * Undo DNAT on a gateway router is done in the ingress DNAT | |
4580 | * pipeline stage. */ | |
4581 | if (od->l3dgw_port && (!strcmp(nat->type, "dnat") | |
4582 | || !strcmp(nat->type, "dnat_and_snat"))) { | |
09b39248 | 4583 | ds_clear(&match); |
06a26dd2 MS |
4584 | ds_put_format(&match, "ip && ip4.src == %s" |
4585 | " && outport == %s", | |
4586 | nat->logical_ip, | |
4587 | od->l3dgw_port->json_key); | |
4588 | if (!distributed && od->l3redirect_port) { | |
4589 | /* Flows for NAT rules that are centralized are only | |
4590 | * programmed on the "redirect-chassis". */ | |
4591 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4592 | od->l3redirect_port->json_key); | |
4593 | } | |
09b39248 | 4594 | ds_clear(&actions); |
06a26dd2 MS |
4595 | if (distributed) { |
4596 | ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", | |
4597 | ETH_ADDR_ARGS(mac)); | |
65d8810c | 4598 | } |
06a26dd2 MS |
4599 | ds_put_format(&actions, "ct_dnat;"); |
4600 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 100, | |
09b39248 | 4601 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
4602 | } |
4603 | ||
4604 | /* Egress SNAT table: Packets enter the egress pipeline with | |
4605 | * source ip address that needs to be SNATted to a external ip | |
4606 | * address. */ | |
4607 | if (!strcmp(nat->type, "snat") | |
4608 | || !strcmp(nat->type, "dnat_and_snat")) { | |
06a26dd2 MS |
4609 | if (!od->l3dgw_port) { |
4610 | /* Gateway router. */ | |
4611 | ds_clear(&match); | |
4612 | ds_put_format(&match, "ip && ip4.src == %s", | |
4613 | nat->logical_ip); | |
4614 | ds_clear(&actions); | |
4615 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
4616 | ||
4617 | /* The priority here is calculated such that the | |
4618 | * nat->logical_ip with the longest mask gets a higher | |
4619 | * priority. */ | |
4620 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
4621 | count_1bits(ntohl(mask)) + 1, | |
4622 | ds_cstr(&match), ds_cstr(&actions)); | |
4623 | } else { | |
4624 | /* Distributed router. */ | |
4625 | ds_clear(&match); | |
4626 | ds_put_format(&match, "ip && ip4.src == %s" | |
4627 | " && outport == %s", | |
4628 | nat->logical_ip, | |
4629 | od->l3dgw_port->json_key); | |
4630 | if (!distributed && od->l3redirect_port) { | |
4631 | /* Flows for NAT rules that are centralized are only | |
4632 | * programmed on the "redirect-chassis". */ | |
4633 | ds_put_format(&match, " && is_chassis_resident(%s)", | |
4634 | od->l3redirect_port->json_key); | |
4635 | } | |
4636 | ds_clear(&actions); | |
4637 | if (distributed) { | |
4638 | ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ", | |
4639 | ETH_ADDR_ARGS(mac)); | |
4640 | } | |
4641 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
4642 | ||
4643 | /* The priority here is calculated such that the | |
4644 | * nat->logical_ip with the longest mask gets a higher | |
4645 | * priority. */ | |
4646 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
4647 | count_1bits(ntohl(mask)) + 1, | |
4648 | ds_cstr(&match), ds_cstr(&actions)); | |
4649 | } | |
4650 | } | |
4651 | ||
4652 | /* Logical router ingress table 0: | |
4653 | * For NAT on a distributed router, add rules allowing | |
4654 | * ingress traffic with eth.dst matching nat->external_mac | |
4655 | * on the l3dgw_port instance where nat->logical_port is | |
4656 | * resident. */ | |
4657 | if (distributed) { | |
09b39248 | 4658 | ds_clear(&match); |
06a26dd2 MS |
4659 | ds_put_format(&match, |
4660 | "eth.dst == "ETH_ADDR_FMT" && inport == %s" | |
4661 | " && is_chassis_resident(\"%s\")", | |
4662 | ETH_ADDR_ARGS(mac), | |
4663 | od->l3dgw_port->json_key, | |
4664 | nat->logical_port); | |
4665 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 50, | |
4666 | ds_cstr(&match), "next;"); | |
4667 | } | |
4668 | ||
4669 | /* Ingress Gateway Redirect Table: For NAT on a distributed | |
4670 | * router, add flows that are specific to a NAT rule. These | |
4671 | * flows indicate the presence of an applicable NAT rule that | |
4672 | * can be applied in a distributed manner. */ | |
4673 | if (distributed) { | |
4674 | ds_clear(&match); | |
4675 | ds_put_format(&match, "ip4.src == %s && outport == %s", | |
4676 | nat->logical_ip, | |
4677 | od->l3dgw_port->json_key); | |
4678 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 100, | |
4679 | ds_cstr(&match), "next;"); | |
4680 | } | |
de297547 | 4681 | |
06a26dd2 MS |
4682 | /* Egress Loopback table: For NAT on a distributed router. |
4683 | * If packets in the egress pipeline on the distributed | |
4684 | * gateway port have ip.dst matching a NAT external IP, then | |
4685 | * loop a clone of the packet back to the beginning of the | |
4686 | * ingress pipeline with inport = outport. */ | |
4687 | if (od->l3dgw_port) { | |
4688 | /* Distributed router. */ | |
4689 | ds_clear(&match); | |
4690 | ds_put_format(&match, "ip4.dst == %s && outport == %s", | |
4691 | nat->external_ip, | |
4692 | od->l3dgw_port->json_key); | |
4693 | ds_clear(&actions); | |
4694 | ds_put_format(&actions, | |
4695 | "clone { ct_clear; " | |
4696 | "inport = outport; outport = \"\"; " | |
4697 | "flags = 0; flags.loopback = 1; "); | |
4698 | for (int i = 0; i < MFF_N_LOG_REGS; i++) { | |
4699 | ds_put_format(&actions, "reg%d = 0; ", i); | |
4700 | } | |
4701 | ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; " | |
4702 | "next(pipeline=ingress, table=0); };"); | |
4703 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100, | |
09b39248 | 4704 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
4705 | } |
4706 | } | |
4707 | ||
65d8810c | 4708 | /* Handle force SNAT options set in the gateway router. */ |
06a26dd2 | 4709 | if (dnat_force_snat_ip && !od->l3dgw_port) { |
65d8810c GS |
4710 | /* If a packet with destination IP address as that of the |
4711 | * gateway router (as set in options:dnat_force_snat_ip) is seen, | |
4712 | * UNSNAT it. */ | |
4713 | ds_clear(&match); | |
4714 | ds_put_format(&match, "ip && ip4.dst == %s", dnat_force_snat_ip); | |
4715 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 110, | |
4716 | ds_cstr(&match), "ct_snat; next;"); | |
4717 | ||
4718 | /* Higher priority rules to force SNAT with the IP addresses | |
4719 | * configured in the Gateway router. This only takes effect | |
4720 | * when the packet has already been DNATed once. */ | |
4721 | ds_clear(&match); | |
4722 | ds_put_format(&match, "flags.force_snat_for_dnat == 1 && ip"); | |
4723 | ds_clear(&actions); | |
4724 | ds_put_format(&actions, "ct_snat(%s);", dnat_force_snat_ip); | |
4725 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, | |
4726 | ds_cstr(&match), ds_cstr(&actions)); | |
4727 | } | |
06a26dd2 | 4728 | if (lb_force_snat_ip && !od->l3dgw_port) { |
65d8810c GS |
4729 | /* If a packet with destination IP address as that of the |
4730 | * gateway router (as set in options:lb_force_snat_ip) is seen, | |
4731 | * UNSNAT it. */ | |
4732 | ds_clear(&match); | |
4733 | ds_put_format(&match, "ip && ip4.dst == %s", lb_force_snat_ip); | |
4734 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, | |
4735 | ds_cstr(&match), "ct_snat; next;"); | |
4736 | ||
4737 | /* Load balanced traffic will have flags.force_snat_for_lb set. | |
4738 | * Force SNAT it. */ | |
4739 | ds_clear(&match); | |
4740 | ds_put_format(&match, "flags.force_snat_for_lb == 1 && ip"); | |
4741 | ds_clear(&actions); | |
4742 | ds_put_format(&actions, "ct_snat(%s);", lb_force_snat_ip); | |
4743 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 100, | |
4744 | ds_cstr(&match), ds_cstr(&actions)); | |
4745 | } | |
4746 | ||
06a26dd2 MS |
4747 | if (!od->l3dgw_port) { |
4748 | /* For gateway router, re-circulate every packet through | |
4749 | * the DNAT zone. This helps with two things. | |
4750 | * | |
4751 | * 1. Any packet that needs to be unDNATed in the reverse | |
4752 | * direction gets unDNATed. Ideally this could be done in | |
4753 | * the egress pipeline. But since the gateway router | |
4754 | * does not have any feature that depends on the source | |
4755 | * ip address being external IP address for IP routing, | |
4756 | * we can do it here, saving a future re-circulation. | |
4757 | * | |
4758 | * 2. Any packet that was sent through SNAT zone in the | |
4759 | * previous table automatically gets re-circulated to get | |
4760 | * back the new destination IP address that is needed for | |
4761 | * routing in the openflow pipeline. */ | |
4762 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
4763 | "ip", "flags.loopback = 1; ct_dnat;"); | |
4764 | } else { | |
4765 | /* For NAT on a distributed router, add flows to Ingress | |
4766 | * IP Routing table, Ingress ARP Resolution table, and | |
4767 | * Ingress Gateway Redirect Table that are not specific to a | |
4768 | * NAT rule. */ | |
4769 | ||
4770 | /* The highest priority IN_IP_ROUTING rule matches packets | |
4771 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages), | |
4772 | * with action "ip.ttl--; next;". The IN_GW_REDIRECT table | |
4773 | * will take care of setting the outport. */ | |
4774 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 300, | |
4775 | REGBIT_NAT_REDIRECT" == 1", "ip.ttl--; next;"); | |
4776 | ||
4777 | /* The highest priority IN_ARP_RESOLVE rule matches packets | |
4778 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages), | |
4779 | * then sets eth.dst to the distributed gateway port's | |
4780 | * ethernet address. */ | |
4781 | ds_clear(&actions); | |
4782 | ds_put_format(&actions, "eth.dst = %s; next;", | |
4783 | od->l3dgw_port->lrp_networks.ea_s); | |
4784 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 200, | |
4785 | REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions)); | |
4786 | ||
4787 | /* The highest priority IN_GW_REDIRECT rule redirects packets | |
4788 | * with REGBIT_NAT_REDIRECT (set in DNAT or UNSNAT stages) to | |
4789 | * the central instance of the l3dgw_port for NAT processing. */ | |
4790 | ds_clear(&actions); | |
4791 | ds_put_format(&actions, "outport = %s; next;", | |
4792 | od->l3redirect_port->json_key); | |
4793 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 200, | |
4794 | REGBIT_NAT_REDIRECT" == 1", ds_cstr(&actions)); | |
4795 | } | |
4796 | ||
4797 | /* Load balancing and packet defrag are only valid on | |
4798 | * Gateway routers. */ | |
4799 | if (!smap_get(&od->nbr->options, "chassis")) { | |
4800 | continue; | |
4801 | } | |
8697d426 MS |
4802 | |
4803 | /* A set to hold all ips that need defragmentation and tracking. */ | |
4804 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
4805 | ||
4806 | for (int i = 0; i < od->nbr->n_load_balancer; i++) { | |
4807 | struct nbrec_load_balancer *lb = od->nbr->load_balancer[i]; | |
4808 | struct smap *vips = &lb->vips; | |
4809 | struct smap_node *node; | |
4810 | ||
4811 | SMAP_FOR_EACH (node, vips) { | |
4812 | uint16_t port = 0; | |
4813 | ||
4814 | /* node->key contains IP:port or just IP. */ | |
4815 | char *ip_address = NULL; | |
4816 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
4817 | if (!ip_address) { | |
4818 | continue; | |
4819 | } | |
4820 | ||
4821 | if (!sset_contains(&all_ips, ip_address)) { | |
4822 | sset_add(&all_ips, ip_address); | |
4823 | } | |
4824 | ||
4825 | /* Higher priority rules are added for load-balancing in DNAT | |
4826 | * table. For every match (on a VIP[:port]), we add two flows | |
4827 | * via add_router_lb_flow(). One flow is for specific matching | |
4828 | * on ct.new with an action of "ct_lb($targets);". The other | |
4829 | * flow is for ct.est with an action of "ct_dnat;". */ | |
4830 | ds_clear(&actions); | |
4831 | ds_put_format(&actions, "ct_lb(%s);", node->value); | |
4832 | ||
4833 | ds_clear(&match); | |
4834 | ds_put_format(&match, "ip && ip4.dst == %s", | |
4835 | ip_address); | |
4836 | free(ip_address); | |
4837 | ||
4838 | if (port) { | |
4839 | if (lb->protocol && !strcmp(lb->protocol, "udp")) { | |
4840 | ds_put_format(&match, " && udp && udp.dst == %d", | |
4841 | port); | |
4842 | } else { | |
4843 | ds_put_format(&match, " && tcp && tcp.dst == %d", | |
4844 | port); | |
4845 | } | |
4846 | add_router_lb_flow(lflows, od, &match, &actions, 120, | |
4847 | lb_force_snat_ip); | |
4848 | } else { | |
4849 | add_router_lb_flow(lflows, od, &match, &actions, 110, | |
4850 | lb_force_snat_ip); | |
4851 | } | |
4852 | } | |
4853 | } | |
4854 | ||
4855 | /* If there are any load balancing rules, we should send the | |
4856 | * packet to conntrack for defragmentation and tracking. This helps | |
4857 | * with two things. | |
4858 | * | |
4859 | * 1. With tracking, we can send only new connections to pick a | |
4860 | * DNAT ip address from a group. | |
4861 | * 2. If there are L4 ports in load balancing rules, we need the | |
4862 | * defragmentation to match on L4 ports. */ | |
4863 | const char *ip_address; | |
4864 | SSET_FOR_EACH(ip_address, &all_ips) { | |
4865 | ds_clear(&match); | |
4866 | ds_put_format(&match, "ip && ip4.dst == %s", ip_address); | |
4867 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, | |
4868 | 100, ds_cstr(&match), "ct_next;"); | |
4869 | } | |
4870 | ||
4871 | sset_destroy(&all_ips); | |
de297547 GS |
4872 | } |
4873 | ||
4f6d33f3 | 4874 | /* Logical router ingress table 5: IP Routing. |
9975d7be BP |
4875 | * |
4876 | * A packet that arrives at this table is an IP packet that should be | |
6fdb7cd6 JP |
4877 | * routed to the address in 'ip[46].dst'. This table sets outport to |
4878 | * the correct output port, eth.src to the output port's MAC | |
4879 | * address, and '[xx]reg0' to the next-hop IP address (leaving | |
4880 | * 'ip[46].dst', the packet’s final destination, unchanged), and | |
4881 | * advances to the next table for ARP/ND resolution. */ | |
9975d7be | 4882 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 4883 | if (!op->nbrp) { |
9975d7be BP |
4884 | continue; |
4885 | } | |
4886 | ||
4685e523 JP |
4887 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
4888 | add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, | |
4889 | op->lrp_networks.ipv4_addrs[i].network_s, | |
440a9f4b | 4890 | op->lrp_networks.ipv4_addrs[i].plen, NULL, NULL); |
4685e523 | 4891 | } |
6fdb7cd6 JP |
4892 | |
4893 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
4894 | add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, | |
4895 | op->lrp_networks.ipv6_addrs[i].network_s, | |
440a9f4b | 4896 | op->lrp_networks.ipv6_addrs[i].plen, NULL, NULL); |
6fdb7cd6 | 4897 | } |
9975d7be | 4898 | } |
4685e523 | 4899 | |
6fdb7cd6 | 4900 | /* Convert the static routes to flows. */ |
9975d7be BP |
4901 | HMAP_FOR_EACH (od, key_node, datapaths) { |
4902 | if (!od->nbr) { | |
4903 | continue; | |
4904 | } | |
4905 | ||
28dc3fe9 SR |
4906 | for (int i = 0; i < od->nbr->n_static_routes; i++) { |
4907 | const struct nbrec_logical_router_static_route *route; | |
4908 | ||
4909 | route = od->nbr->static_routes[i]; | |
4910 | build_static_route_flow(lflows, od, ports, route); | |
4911 | } | |
9975d7be | 4912 | } |
6fdb7cd6 | 4913 | |
9975d7be BP |
4914 | /* XXX destination unreachable */ |
4915 | ||
4f6d33f3 | 4916 | /* Local router ingress table 6: ARP Resolution. |
9975d7be BP |
4917 | * |
4918 | * Any packet that reaches this table is an IP packet whose next-hop IP | |
4919 | * address is in reg0. (ip4.dst is the final destination.) This table | |
4920 | * resolves the IP address in reg0 into an output port in outport and an | |
4921 | * Ethernet address in eth.dst. */ | |
4922 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 4923 | if (op->nbrp) { |
6fdb7cd6 JP |
4924 | /* This is a logical router port. If next-hop IP address in |
4925 | * '[xx]reg0' matches IP address of this router port, then | |
4926 | * the packet is intended to eventually be sent to this | |
4927 | * logical port. Set the destination mac address using this | |
4928 | * port's mac address. | |
509afdc3 GS |
4929 | * |
4930 | * The packet is still in peer's logical pipeline. So the match | |
4931 | * should be on peer's outport. */ | |
6fdb7cd6 JP |
4932 | if (op->peer && op->nbrp->peer) { |
4933 | if (op->lrp_networks.n_ipv4_addrs) { | |
4934 | ds_clear(&match); | |
4935 | ds_put_format(&match, "outport == %s && reg0 == ", | |
4936 | op->peer->json_key); | |
4937 | op_put_v4_networks(&match, op, false); | |
4938 | ||
4939 | ds_clear(&actions); | |
4940 | ds_put_format(&actions, "eth.dst = %s; next;", | |
4941 | op->lrp_networks.ea_s); | |
4942 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
4943 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
4944 | } | |
4685e523 | 4945 | |
6fdb7cd6 JP |
4946 | if (op->lrp_networks.n_ipv6_addrs) { |
4947 | ds_clear(&match); | |
4948 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
4949 | op->peer->json_key); | |
4950 | op_put_v6_networks(&match, op); | |
4951 | ||
4952 | ds_clear(&actions); | |
4953 | ds_put_format(&actions, "eth.dst = %s; next;", | |
4954 | op->lrp_networks.ea_s); | |
4955 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
4956 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
4957 | } | |
509afdc3 | 4958 | } |
0ee00741 | 4959 | } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
4960 | /* This is a logical switch port that backs a VM or a container. |
4961 | * Extract its addresses. For each of the address, go through all | |
4962 | * the router ports attached to the switch (to which this port | |
4963 | * connects) and if the address in question is reachable from the | |
6fdb7cd6 | 4964 | * router port, add an ARP/ND entry in that router's pipeline. */ |
75cf9d2b | 4965 | |
e93b43d6 | 4966 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
4685e523 | 4967 | const char *ea_s = op->lsp_addrs[i].ea_s; |
e93b43d6 | 4968 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { |
4685e523 | 4969 | const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; |
e93b43d6 | 4970 | for (size_t k = 0; k < op->od->n_router_ports; k++) { |
80f408f4 JP |
4971 | /* Get the Logical_Router_Port that the |
4972 | * Logical_Switch_Port is connected to, as | |
4973 | * 'peer'. */ | |
86e98048 | 4974 | const char *peer_name = smap_get( |
0ee00741 | 4975 | &op->od->router_ports[k]->nbsp->options, |
86e98048 BP |
4976 | "router-port"); |
4977 | if (!peer_name) { | |
4978 | continue; | |
4979 | } | |
4980 | ||
e93b43d6 | 4981 | struct ovn_port *peer = ovn_port_find(ports, peer_name); |
0ee00741 | 4982 | if (!peer || !peer->nbrp) { |
86e98048 BP |
4983 | continue; |
4984 | } | |
4985 | ||
4685e523 | 4986 | if (!find_lrp_member_ip(peer, ip_s)) { |
86e98048 BP |
4987 | continue; |
4988 | } | |
4989 | ||
09b39248 | 4990 | ds_clear(&match); |
e93b43d6 | 4991 | ds_put_format(&match, "outport == %s && reg0 == %s", |
4685e523 JP |
4992 | peer->json_key, ip_s); |
4993 | ||
09b39248 | 4994 | ds_clear(&actions); |
4685e523 | 4995 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); |
86e98048 | 4996 | ovn_lflow_add(lflows, peer->od, |
09b39248 JP |
4997 | S_ROUTER_IN_ARP_RESOLVE, 100, |
4998 | ds_cstr(&match), ds_cstr(&actions)); | |
86e98048 | 4999 | } |
9975d7be | 5000 | } |
6fdb7cd6 JP |
5001 | |
5002 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
5003 | const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; | |
5004 | for (size_t k = 0; k < op->od->n_router_ports; k++) { | |
5005 | /* Get the Logical_Router_Port that the | |
5006 | * Logical_Switch_Port is connected to, as | |
5007 | * 'peer'. */ | |
5008 | const char *peer_name = smap_get( | |
5009 | &op->od->router_ports[k]->nbsp->options, | |
5010 | "router-port"); | |
5011 | if (!peer_name) { | |
5012 | continue; | |
5013 | } | |
5014 | ||
5015 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
5016 | if (!peer || !peer->nbrp) { | |
5017 | continue; | |
5018 | } | |
5019 | ||
5020 | if (!find_lrp_member_ip(peer, ip_s)) { | |
5021 | continue; | |
5022 | } | |
5023 | ||
5024 | ds_clear(&match); | |
5025 | ds_put_format(&match, "outport == %s && xxreg0 == %s", | |
5026 | peer->json_key, ip_s); | |
5027 | ||
5028 | ds_clear(&actions); | |
5029 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); | |
5030 | ovn_lflow_add(lflows, peer->od, | |
5031 | S_ROUTER_IN_ARP_RESOLVE, 100, | |
5032 | ds_cstr(&match), ds_cstr(&actions)); | |
5033 | } | |
5034 | } | |
9975d7be | 5035 | } |
0ee00741 | 5036 | } else if (!strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
5037 | /* This is a logical switch port that connects to a router. */ |
5038 | ||
5039 | /* The peer of this switch port is the router port for which | |
5040 | * we need to add logical flows such that it can resolve | |
5041 | * ARP entries for all the other router ports connected to | |
5042 | * the switch in question. */ | |
5043 | ||
0ee00741 | 5044 | const char *peer_name = smap_get(&op->nbsp->options, |
75cf9d2b GS |
5045 | "router-port"); |
5046 | if (!peer_name) { | |
5047 | continue; | |
5048 | } | |
5049 | ||
5050 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 5051 | if (!peer || !peer->nbrp) { |
75cf9d2b GS |
5052 | continue; |
5053 | } | |
5054 | ||
4685e523 | 5055 | for (size_t i = 0; i < op->od->n_router_ports; i++) { |
75cf9d2b | 5056 | const char *router_port_name = smap_get( |
0ee00741 | 5057 | &op->od->router_ports[i]->nbsp->options, |
75cf9d2b GS |
5058 | "router-port"); |
5059 | struct ovn_port *router_port = ovn_port_find(ports, | |
5060 | router_port_name); | |
0ee00741 | 5061 | if (!router_port || !router_port->nbrp) { |
75cf9d2b GS |
5062 | continue; |
5063 | } | |
5064 | ||
5065 | /* Skip the router port under consideration. */ | |
5066 | if (router_port == peer) { | |
5067 | continue; | |
5068 | } | |
5069 | ||
6fdb7cd6 JP |
5070 | if (router_port->lrp_networks.n_ipv4_addrs) { |
5071 | ds_clear(&match); | |
5072 | ds_put_format(&match, "outport == %s && reg0 == ", | |
5073 | peer->json_key); | |
5074 | op_put_v4_networks(&match, router_port, false); | |
5075 | ||
5076 | ds_clear(&actions); | |
5077 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5078 | router_port->lrp_networks.ea_s); | |
5079 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5080 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5081 | } | |
4685e523 | 5082 | |
6fdb7cd6 JP |
5083 | if (router_port->lrp_networks.n_ipv6_addrs) { |
5084 | ds_clear(&match); | |
5085 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
5086 | peer->json_key); | |
5087 | op_put_v6_networks(&match, router_port); | |
5088 | ||
5089 | ds_clear(&actions); | |
5090 | ds_put_format(&actions, "eth.dst = %s; next;", | |
5091 | router_port->lrp_networks.ea_s); | |
5092 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
5093 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
5094 | } | |
75cf9d2b | 5095 | } |
9975d7be BP |
5096 | } |
5097 | } | |
75cf9d2b | 5098 | |
0bac7164 BP |
5099 | HMAP_FOR_EACH (od, key_node, datapaths) { |
5100 | if (!od->nbr) { | |
5101 | continue; | |
5102 | } | |
5103 | ||
c34a87b6 JP |
5104 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", |
5105 | "get_arp(outport, reg0); next;"); | |
5106 | ||
5107 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", | |
5108 | "get_nd(outport, xxreg0); next;"); | |
0bac7164 BP |
5109 | } |
5110 | ||
41a15b71 MS |
5111 | /* Logical router ingress table 7: Gateway redirect. |
5112 | * | |
5113 | * For traffic with outport equal to the l3dgw_port | |
5114 | * on a distributed router, this table redirects a subset | |
5115 | * of the traffic to the l3redirect_port which represents | |
5116 | * the central instance of the l3dgw_port. | |
5117 | */ | |
5118 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
5119 | if (!od->nbr) { | |
5120 | continue; | |
5121 | } | |
5122 | if (od->l3dgw_port && od->l3redirect_port) { | |
5123 | /* For traffic with outport == l3dgw_port, if the | |
5124 | * packet did not match any higher priority redirect | |
5125 | * rule, then the traffic is redirected to the central | |
5126 | * instance of the l3dgw_port. */ | |
5127 | ds_clear(&match); | |
5128 | ds_put_format(&match, "outport == %s", | |
5129 | od->l3dgw_port->json_key); | |
5130 | ds_clear(&actions); | |
5131 | ds_put_format(&actions, "outport = %s; next;", | |
5132 | od->l3redirect_port->json_key); | |
5133 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50, | |
5134 | ds_cstr(&match), ds_cstr(&actions)); | |
5135 | ||
5136 | /* If the Ethernet destination has not been resolved, | |
5137 | * redirect to the central instance of the l3dgw_port. | |
5138 | * Such traffic will be replaced by an ARP request or ND | |
5139 | * Neighbor Solicitation in the ARP request ingress | |
5140 | * table, before being redirected to the central instance. | |
5141 | */ | |
5142 | ds_put_format(&match, " && eth.dst == 00:00:00:00:00:00"); | |
5143 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 150, | |
5144 | ds_cstr(&match), ds_cstr(&actions)); | |
5145 | } | |
5146 | ||
5147 | /* Packets are allowed by default. */ | |
5148 | ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;"); | |
5149 | } | |
5150 | ||
5151 | /* Local router ingress table 8: ARP request. | |
0bac7164 BP |
5152 | * |
5153 | * In the common case where the Ethernet destination has been resolved, | |
94300e09 JP |
5154 | * this table outputs the packet (priority 0). Otherwise, it composes |
5155 | * and sends an ARP request (priority 100). */ | |
0bac7164 BP |
5156 | HMAP_FOR_EACH (od, key_node, datapaths) { |
5157 | if (!od->nbr) { | |
5158 | continue; | |
5159 | } | |
5160 | ||
5161 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, | |
5162 | "eth.dst == 00:00:00:00:00:00", | |
5163 | "arp { " | |
5164 | "eth.dst = ff:ff:ff:ff:ff:ff; " | |
5165 | "arp.spa = reg1; " | |
47021598 | 5166 | "arp.tpa = reg0; " |
0bac7164 BP |
5167 | "arp.op = 1; " /* ARP request */ |
5168 | "output; " | |
5169 | "};"); | |
5170 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); | |
5171 | } | |
9975d7be | 5172 | |
de297547 | 5173 | /* Logical router egress table 1: Delivery (priority 100). |
9975d7be BP |
5174 | * |
5175 | * Priority 100 rules deliver packets to enabled logical ports. */ | |
5176 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 5177 | if (!op->nbrp) { |
9975d7be BP |
5178 | continue; |
5179 | } | |
5180 | ||
0ee00741 | 5181 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
5182 | /* Drop packets to disabled logical ports (since logical flow |
5183 | * tables are default-drop). */ | |
5184 | continue; | |
5185 | } | |
5186 | ||
41a15b71 MS |
5187 | if (op->derived) { |
5188 | /* No egress packets should be processed in the context of | |
5189 | * a chassisredirect port. The chassisredirect port should | |
5190 | * be replaced by the l3dgw port in the local output | |
5191 | * pipeline stage before egress processing. */ | |
5192 | continue; | |
5193 | } | |
5194 | ||
09b39248 JP |
5195 | ds_clear(&match); |
5196 | ds_put_format(&match, "outport == %s", op->json_key); | |
9975d7be | 5197 | ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, |
09b39248 | 5198 | ds_cstr(&match), "output;"); |
9975d7be | 5199 | } |
09b39248 JP |
5200 | |
5201 | ds_destroy(&match); | |
5202 | ds_destroy(&actions); | |
9975d7be BP |
5203 | } |
5204 | ||
5205 | /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, | |
5206 | * constructing their contents based on the OVN_NB database. */ | |
5207 | static void | |
5208 | build_lflows(struct northd_context *ctx, struct hmap *datapaths, | |
5209 | struct hmap *ports) | |
5210 | { | |
5211 | struct hmap lflows = HMAP_INITIALIZER(&lflows); | |
5212 | struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); | |
5213 | ||
5214 | build_lswitch_flows(datapaths, ports, &lflows, &mcgroups); | |
5215 | build_lrouter_flows(datapaths, ports, &lflows); | |
5216 | ||
5868eb24 BP |
5217 | /* Push changes to the Logical_Flow table to database. */ |
5218 | const struct sbrec_logical_flow *sbflow, *next_sbflow; | |
5219 | SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { | |
5220 | struct ovn_datapath *od | |
5221 | = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath); | |
5222 | if (!od) { | |
5223 | sbrec_logical_flow_delete(sbflow); | |
5224 | continue; | |
eb00399e | 5225 | } |
eb00399e | 5226 | |
9975d7be | 5227 | enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER; |
880fcd14 BP |
5228 | enum ovn_pipeline pipeline |
5229 | = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT; | |
5868eb24 | 5230 | struct ovn_lflow *lflow = ovn_lflow_find( |
880fcd14 BP |
5231 | &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id), |
5232 | sbflow->priority, sbflow->match, sbflow->actions); | |
5868eb24 BP |
5233 | if (lflow) { |
5234 | ovn_lflow_destroy(&lflows, lflow); | |
5235 | } else { | |
5236 | sbrec_logical_flow_delete(sbflow); | |
4edcdcf4 RB |
5237 | } |
5238 | } | |
5868eb24 BP |
5239 | struct ovn_lflow *lflow, *next_lflow; |
5240 | HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) { | |
880fcd14 BP |
5241 | enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage); |
5242 | uint8_t table = ovn_stage_get_table(lflow->stage); | |
5243 | ||
5868eb24 BP |
5244 | sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn); |
5245 | sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb); | |
9975d7be BP |
5246 | sbrec_logical_flow_set_pipeline( |
5247 | sbflow, pipeline == P_IN ? "ingress" : "egress"); | |
880fcd14 | 5248 | sbrec_logical_flow_set_table_id(sbflow, table); |
5868eb24 BP |
5249 | sbrec_logical_flow_set_priority(sbflow, lflow->priority); |
5250 | sbrec_logical_flow_set_match(sbflow, lflow->match); | |
5251 | sbrec_logical_flow_set_actions(sbflow, lflow->actions); | |
091e3af9 | 5252 | |
d8026bbf BP |
5253 | /* Trim the source locator lflow->where, which looks something like |
5254 | * "ovn/northd/ovn-northd.c:1234", down to just the part following the | |
5255 | * last slash, e.g. "ovn-northd.c:1234". */ | |
5256 | const char *slash = strrchr(lflow->where, '/'); | |
5257 | #if _WIN32 | |
5258 | const char *backslash = strrchr(lflow->where, '\\'); | |
5259 | if (!slash || backslash > slash) { | |
5260 | slash = backslash; | |
5261 | } | |
5262 | #endif | |
5263 | const char *where = slash ? slash + 1 : lflow->where; | |
5264 | ||
5265 | const struct smap ids = SMAP_CONST2( | |
5266 | &ids, | |
5267 | "stage-name", ovn_stage_to_str(lflow->stage), | |
5268 | "source", where); | |
aaf881c6 | 5269 | sbrec_logical_flow_set_external_ids(sbflow, &ids); |
091e3af9 | 5270 | |
5868eb24 | 5271 | ovn_lflow_destroy(&lflows, lflow); |
eb00399e | 5272 | } |
5868eb24 BP |
5273 | hmap_destroy(&lflows); |
5274 | ||
5275 | /* Push changes to the Multicast_Group table to database. */ | |
5276 | const struct sbrec_multicast_group *sbmc, *next_sbmc; | |
5277 | SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) { | |
5278 | struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths, | |
5279 | sbmc->datapath); | |
5280 | if (!od) { | |
5281 | sbrec_multicast_group_delete(sbmc); | |
5282 | continue; | |
5283 | } | |
eb00399e | 5284 | |
5868eb24 BP |
5285 | struct multicast_group group = { .name = sbmc->name, |
5286 | .key = sbmc->tunnel_key }; | |
5287 | struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group); | |
5288 | if (mc) { | |
5289 | ovn_multicast_update_sbrec(mc, sbmc); | |
5290 | ovn_multicast_destroy(&mcgroups, mc); | |
5291 | } else { | |
5292 | sbrec_multicast_group_delete(sbmc); | |
5293 | } | |
5294 | } | |
5295 | struct ovn_multicast *mc, *next_mc; | |
5296 | HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) { | |
5297 | sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn); | |
5298 | sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb); | |
5299 | sbrec_multicast_group_set_name(sbmc, mc->group->name); | |
5300 | sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key); | |
5301 | ovn_multicast_update_sbrec(mc, sbmc); | |
5302 | ovn_multicast_destroy(&mcgroups, mc); | |
4edcdcf4 | 5303 | } |
5868eb24 | 5304 | hmap_destroy(&mcgroups); |
4edcdcf4 | 5305 | } |
ea382567 RB |
5306 | |
5307 | /* OVN_Northbound and OVN_Southbound have an identical Address_Set table. | |
5308 | * We always update OVN_Southbound to match the current data in | |
5309 | * OVN_Northbound, so that the address sets used in Logical_Flows in | |
5310 | * OVN_Southbound is checked against the proper set.*/ | |
5311 | static void | |
5312 | sync_address_sets(struct northd_context *ctx) | |
5313 | { | |
5314 | struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); | |
5315 | ||
5316 | const struct sbrec_address_set *sb_address_set; | |
5317 | SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) { | |
5318 | shash_add(&sb_address_sets, sb_address_set->name, sb_address_set); | |
5319 | } | |
5320 | ||
5321 | const struct nbrec_address_set *nb_address_set; | |
5322 | NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) { | |
5323 | sb_address_set = shash_find_and_delete(&sb_address_sets, | |
5324 | nb_address_set->name); | |
5325 | if (!sb_address_set) { | |
5326 | sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn); | |
5327 | sbrec_address_set_set_name(sb_address_set, nb_address_set->name); | |
5328 | } | |
5329 | ||
5330 | sbrec_address_set_set_addresses(sb_address_set, | |
5331 | /* "char **" is not compatible with "const char **" */ | |
5332 | (const char **) nb_address_set->addresses, | |
5333 | nb_address_set->n_addresses); | |
5334 | } | |
5335 | ||
5336 | struct shash_node *node, *next; | |
5337 | SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) { | |
5338 | sbrec_address_set_delete(node->data); | |
5339 | shash_delete(&sb_address_sets, node); | |
5340 | } | |
5341 | shash_destroy(&sb_address_sets); | |
5342 | } | |
5868eb24 | 5343 | \f |
4edcdcf4 | 5344 | static void |
fa183acc | 5345 | ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop) |
4edcdcf4 | 5346 | { |
b511690b | 5347 | if (!ctx->ovnsb_txn || !ctx->ovnnb_txn) { |
331e7aef NS |
5348 | return; |
5349 | } | |
5868eb24 BP |
5350 | struct hmap datapaths, ports; |
5351 | build_datapaths(ctx, &datapaths); | |
5352 | build_ports(ctx, &datapaths, &ports); | |
b511690b | 5353 | build_ipam(&datapaths, &ports); |
5868eb24 BP |
5354 | build_lflows(ctx, &datapaths, &ports); |
5355 | ||
ea382567 RB |
5356 | sync_address_sets(ctx); |
5357 | ||
5868eb24 BP |
5358 | struct ovn_datapath *dp, *next_dp; |
5359 | HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) { | |
5360 | ovn_datapath_destroy(&datapaths, dp); | |
5361 | } | |
5362 | hmap_destroy(&datapaths); | |
5363 | ||
5364 | struct ovn_port *port, *next_port; | |
5365 | HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) { | |
5366 | ovn_port_destroy(&ports, port); | |
5367 | } | |
5368 | hmap_destroy(&ports); | |
fa183acc BP |
5369 | |
5370 | /* Copy nb_cfg from northbound to southbound database. | |
5371 | * | |
5372 | * Also set up to update sb_cfg once our southbound transaction commits. */ | |
5373 | const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl); | |
14338f22 GS |
5374 | if (!nb) { |
5375 | nb = nbrec_nb_global_insert(ctx->ovnnb_txn); | |
5376 | } | |
fa183acc | 5377 | const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl); |
14338f22 GS |
5378 | if (!sb) { |
5379 | sb = sbrec_sb_global_insert(ctx->ovnsb_txn); | |
fa183acc | 5380 | } |
14338f22 GS |
5381 | sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg); |
5382 | sb_loop->next_cfg = nb->nb_cfg; | |
8639f9be ND |
5383 | |
5384 | cleanup_macam(&macam); | |
ac0630a2 RB |
5385 | } |
5386 | ||
fa183acc BP |
5387 | /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When |
5388 | * this column is not empty, it means we need to set the corresponding logical | |
5389 | * port as 'up' in the northbound DB. */ | |
ac0630a2 | 5390 | static void |
fa183acc | 5391 | update_logical_port_status(struct northd_context *ctx) |
ac0630a2 | 5392 | { |
fc3113bc | 5393 | struct hmap lports_hmap; |
5868eb24 | 5394 | const struct sbrec_port_binding *sb; |
0ee00741 | 5395 | const struct nbrec_logical_switch_port *nbsp; |
fc3113bc RB |
5396 | |
5397 | struct lport_hash_node { | |
5398 | struct hmap_node node; | |
0ee00741 | 5399 | const struct nbrec_logical_switch_port *nbsp; |
4ec3d7c7 | 5400 | } *hash_node; |
f93818dd | 5401 | |
fc3113bc | 5402 | hmap_init(&lports_hmap); |
f93818dd | 5403 | |
0ee00741 | 5404 | NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) { |
fc3113bc | 5405 | hash_node = xzalloc(sizeof *hash_node); |
0ee00741 HK |
5406 | hash_node->nbsp = nbsp; |
5407 | hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0)); | |
fc3113bc RB |
5408 | } |
5409 | ||
5868eb24 | 5410 | SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) { |
0ee00741 | 5411 | nbsp = NULL; |
fc3113bc | 5412 | HMAP_FOR_EACH_WITH_HASH(hash_node, node, |
5868eb24 BP |
5413 | hash_string(sb->logical_port, 0), |
5414 | &lports_hmap) { | |
0ee00741 HK |
5415 | if (!strcmp(sb->logical_port, hash_node->nbsp->name)) { |
5416 | nbsp = hash_node->nbsp; | |
fc3113bc RB |
5417 | break; |
5418 | } | |
f93818dd RB |
5419 | } |
5420 | ||
0ee00741 | 5421 | if (!nbsp) { |
dcda6e0d | 5422 | /* The logical port doesn't exist for this port binding. This can |
2e2762d4 | 5423 | * happen under normal circumstances when ovn-northd hasn't gotten |
dcda6e0d | 5424 | * around to pruning the Port_Binding yet. */ |
f93818dd RB |
5425 | continue; |
5426 | } | |
5427 | ||
0ee00741 | 5428 | if (sb->chassis && (!nbsp->up || !*nbsp->up)) { |
f93818dd | 5429 | bool up = true; |
0ee00741 HK |
5430 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
5431 | } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) { | |
f93818dd | 5432 | bool up = false; |
0ee00741 | 5433 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
f93818dd RB |
5434 | } |
5435 | } | |
fc3113bc | 5436 | |
4ec3d7c7 | 5437 | HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) { |
fc3113bc RB |
5438 | free(hash_node); |
5439 | } | |
5440 | hmap_destroy(&lports_hmap); | |
ac0630a2 | 5441 | } |
45f98d4c | 5442 | |
281977f7 NS |
5443 | static struct dhcp_opts_map supported_dhcp_opts[] = { |
5444 | OFFERIP, | |
5445 | DHCP_OPT_NETMASK, | |
5446 | DHCP_OPT_ROUTER, | |
5447 | DHCP_OPT_DNS_SERVER, | |
5448 | DHCP_OPT_LOG_SERVER, | |
5449 | DHCP_OPT_LPR_SERVER, | |
5450 | DHCP_OPT_SWAP_SERVER, | |
5451 | DHCP_OPT_POLICY_FILTER, | |
5452 | DHCP_OPT_ROUTER_SOLICITATION, | |
5453 | DHCP_OPT_NIS_SERVER, | |
5454 | DHCP_OPT_NTP_SERVER, | |
5455 | DHCP_OPT_SERVER_ID, | |
5456 | DHCP_OPT_TFTP_SERVER, | |
5457 | DHCP_OPT_CLASSLESS_STATIC_ROUTE, | |
5458 | DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE, | |
5459 | DHCP_OPT_IP_FORWARD_ENABLE, | |
5460 | DHCP_OPT_ROUTER_DISCOVERY, | |
5461 | DHCP_OPT_ETHERNET_ENCAP, | |
5462 | DHCP_OPT_DEFAULT_TTL, | |
5463 | DHCP_OPT_TCP_TTL, | |
5464 | DHCP_OPT_MTU, | |
5465 | DHCP_OPT_LEASE_TIME, | |
5466 | DHCP_OPT_T1, | |
5467 | DHCP_OPT_T2 | |
5468 | }; | |
5469 | ||
33ac3c83 NS |
5470 | static struct dhcp_opts_map supported_dhcpv6_opts[] = { |
5471 | DHCPV6_OPT_IA_ADDR, | |
5472 | DHCPV6_OPT_SERVER_ID, | |
5473 | DHCPV6_OPT_DOMAIN_SEARCH, | |
5474 | DHCPV6_OPT_DNS_SERVER | |
5475 | }; | |
5476 | ||
281977f7 NS |
5477 | static void |
5478 | check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx) | |
5479 | { | |
5480 | struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add); | |
5481 | for (size_t i = 0; (i < sizeof(supported_dhcp_opts) / | |
5482 | sizeof(supported_dhcp_opts[0])); i++) { | |
5483 | hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node, | |
5484 | dhcp_opt_hash(supported_dhcp_opts[i].name)); | |
5485 | } | |
5486 | ||
5487 | const struct sbrec_dhcp_options *opt_row, *opt_row_next; | |
5488 | SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
5489 | struct dhcp_opts_map *dhcp_opt = | |
5490 | dhcp_opts_find(&dhcp_opts_to_add, opt_row->name); | |
5491 | if (dhcp_opt) { | |
5492 | hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node); | |
5493 | } else { | |
5494 | sbrec_dhcp_options_delete(opt_row); | |
5495 | } | |
5496 | } | |
5497 | ||
5498 | struct dhcp_opts_map *opt; | |
5499 | HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) { | |
5500 | struct sbrec_dhcp_options *sbrec_dhcp_option = | |
5501 | sbrec_dhcp_options_insert(ctx->ovnsb_txn); | |
5502 | sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name); | |
5503 | sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code); | |
5504 | sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type); | |
5505 | } | |
5506 | ||
5507 | hmap_destroy(&dhcp_opts_to_add); | |
5508 | } | |
5509 | ||
33ac3c83 NS |
5510 | static void |
5511 | check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx) | |
5512 | { | |
5513 | struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add); | |
5514 | for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) / | |
5515 | sizeof(supported_dhcpv6_opts[0])); i++) { | |
5516 | hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node, | |
5517 | dhcp_opt_hash(supported_dhcpv6_opts[i].name)); | |
5518 | } | |
5519 | ||
5520 | const struct sbrec_dhcpv6_options *opt_row, *opt_row_next; | |
5521 | SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
5522 | struct dhcp_opts_map *dhcp_opt = | |
5523 | dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name); | |
5524 | if (dhcp_opt) { | |
5525 | hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node); | |
5526 | } else { | |
5527 | sbrec_dhcpv6_options_delete(opt_row); | |
5528 | } | |
5529 | } | |
5530 | ||
5531 | struct dhcp_opts_map *opt; | |
5532 | HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) { | |
5533 | struct sbrec_dhcpv6_options *sbrec_dhcpv6_option = | |
5534 | sbrec_dhcpv6_options_insert(ctx->ovnsb_txn); | |
5535 | sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name); | |
5536 | sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code); | |
5537 | sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type); | |
5538 | } | |
5539 | ||
5540 | hmap_destroy(&dhcpv6_opts_to_add); | |
5541 | } | |
5542 | ||
fa183acc BP |
5543 | /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */ |
5544 | static void | |
5545 | update_northbound_cfg(struct northd_context *ctx, | |
5546 | struct ovsdb_idl_loop *sb_loop) | |
5547 | { | |
5548 | /* Update northbound sb_cfg if appropriate. */ | |
5549 | const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl); | |
5550 | int64_t sb_cfg = sb_loop->cur_cfg; | |
5551 | if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) { | |
5552 | nbrec_nb_global_set_sb_cfg(nbg, sb_cfg); | |
5553 | } | |
5554 | ||
5555 | /* Update northbound hv_cfg if appropriate. */ | |
5556 | if (nbg) { | |
5557 | /* Find minimum nb_cfg among all chassis. */ | |
5558 | const struct sbrec_chassis *chassis; | |
5559 | int64_t hv_cfg = nbg->nb_cfg; | |
5560 | SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) { | |
5561 | if (chassis->nb_cfg < hv_cfg) { | |
5562 | hv_cfg = chassis->nb_cfg; | |
5563 | } | |
5564 | } | |
5565 | ||
5566 | /* Update hv_cfg. */ | |
5567 | if (nbg->hv_cfg != hv_cfg) { | |
5568 | nbrec_nb_global_set_hv_cfg(nbg, hv_cfg); | |
5569 | } | |
5570 | } | |
5571 | } | |
5572 | ||
5573 | /* Handle a fairly small set of changes in the southbound database. */ | |
5574 | static void | |
5575 | ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop) | |
5576 | { | |
5577 | if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { | |
5578 | return; | |
5579 | } | |
5580 | ||
5581 | update_logical_port_status(ctx); | |
5582 | update_northbound_cfg(ctx, sb_loop); | |
5583 | } | |
5584 | \f | |
ac0630a2 RB |
5585 | static void |
5586 | parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) | |
5587 | { | |
5588 | enum { | |
67d9b930 | 5589 | DAEMON_OPTION_ENUMS, |
ac0630a2 | 5590 | VLOG_OPTION_ENUMS, |
e18a1d08 | 5591 | SSL_OPTION_ENUMS, |
ac0630a2 RB |
5592 | }; |
5593 | static const struct option long_options[] = { | |
ec78987f | 5594 | {"ovnsb-db", required_argument, NULL, 'd'}, |
ac0630a2 RB |
5595 | {"ovnnb-db", required_argument, NULL, 'D'}, |
5596 | {"help", no_argument, NULL, 'h'}, | |
5597 | {"options", no_argument, NULL, 'o'}, | |
5598 | {"version", no_argument, NULL, 'V'}, | |
67d9b930 | 5599 | DAEMON_LONG_OPTIONS, |
ac0630a2 RB |
5600 | VLOG_LONG_OPTIONS, |
5601 | STREAM_SSL_LONG_OPTIONS, | |
5602 | {NULL, 0, NULL, 0}, | |
5603 | }; | |
5604 | char *short_options = ovs_cmdl_long_options_to_short_options(long_options); | |
5605 | ||
5606 | for (;;) { | |
5607 | int c; | |
5608 | ||
5609 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
5610 | if (c == -1) { | |
5611 | break; | |
5612 | } | |
5613 | ||
5614 | switch (c) { | |
67d9b930 | 5615 | DAEMON_OPTION_HANDLERS; |
ac0630a2 RB |
5616 | VLOG_OPTION_HANDLERS; |
5617 | STREAM_SSL_OPTION_HANDLERS; | |
5618 | ||
5619 | case 'd': | |
ec78987f | 5620 | ovnsb_db = optarg; |
ac0630a2 RB |
5621 | break; |
5622 | ||
5623 | case 'D': | |
5624 | ovnnb_db = optarg; | |
5625 | break; | |
5626 | ||
5627 | case 'h': | |
5628 | usage(); | |
5629 | exit(EXIT_SUCCESS); | |
5630 | ||
5631 | case 'o': | |
5632 | ovs_cmdl_print_options(long_options); | |
5633 | exit(EXIT_SUCCESS); | |
5634 | ||
5635 | case 'V': | |
5636 | ovs_print_version(0, 0); | |
5637 | exit(EXIT_SUCCESS); | |
5638 | ||
5639 | default: | |
5640 | break; | |
5641 | } | |
5642 | } | |
5643 | ||
ec78987f | 5644 | if (!ovnsb_db) { |
60bdd011 | 5645 | ovnsb_db = default_sb_db(); |
ac0630a2 RB |
5646 | } |
5647 | ||
5648 | if (!ovnnb_db) { | |
60bdd011 | 5649 | ovnnb_db = default_nb_db(); |
ac0630a2 RB |
5650 | } |
5651 | ||
5652 | free(short_options); | |
5653 | } | |
5654 | ||
5868eb24 BP |
5655 | static void |
5656 | add_column_noalert(struct ovsdb_idl *idl, | |
5657 | const struct ovsdb_idl_column *column) | |
5658 | { | |
5659 | ovsdb_idl_add_column(idl, column); | |
5660 | ovsdb_idl_omit_alert(idl, column); | |
5661 | } | |
5662 | ||
ac0630a2 RB |
5663 | int |
5664 | main(int argc, char *argv[]) | |
5665 | { | |
ac0630a2 | 5666 | int res = EXIT_SUCCESS; |
7b303ff9 AW |
5667 | struct unixctl_server *unixctl; |
5668 | int retval; | |
5669 | bool exiting; | |
ac0630a2 RB |
5670 | |
5671 | fatal_ignore_sigpipe(); | |
3dada172 | 5672 | ovs_cmdl_proctitle_init(argc, argv); |
ac0630a2 | 5673 | set_program_name(argv[0]); |
485f0696 | 5674 | service_start(&argc, &argv); |
ac0630a2 | 5675 | parse_options(argc, argv); |
67d9b930 | 5676 | |
e91b927d | 5677 | daemonize_start(false); |
7b303ff9 AW |
5678 | |
5679 | retval = unixctl_server_create(NULL, &unixctl); | |
5680 | if (retval) { | |
5681 | exit(EXIT_FAILURE); | |
5682 | } | |
5683 | unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting); | |
5684 | ||
5685 | daemonize_complete(); | |
67d9b930 | 5686 | |
fa183acc | 5687 | /* We want to detect (almost) all changes to the ovn-nb db. */ |
331e7aef NS |
5688 | struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
5689 | ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); | |
fa183acc BP |
5690 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg); |
5691 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg); | |
331e7aef | 5692 | |
fa183acc | 5693 | /* We want to detect only selected changes to the ovn-sb db. */ |
331e7aef NS |
5694 | struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
5695 | ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true)); | |
5696 | ||
fa183acc BP |
5697 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global); |
5698 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg); | |
5699 | ||
331e7aef NS |
5700 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow); |
5701 | add_column_noalert(ovnsb_idl_loop.idl, | |
5702 | &sbrec_logical_flow_col_logical_datapath); | |
5703 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline); | |
5704 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id); | |
5705 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority); | |
5706 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match); | |
5707 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions); | |
5708 | ||
5709 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group); | |
5710 | add_column_noalert(ovnsb_idl_loop.idl, | |
5711 | &sbrec_multicast_group_col_datapath); | |
5712 | add_column_noalert(ovnsb_idl_loop.idl, | |
5713 | &sbrec_multicast_group_col_tunnel_key); | |
5714 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name); | |
5715 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports); | |
5716 | ||
5717 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding); | |
5718 | add_column_noalert(ovnsb_idl_loop.idl, | |
5719 | &sbrec_datapath_binding_col_tunnel_key); | |
5720 | add_column_noalert(ovnsb_idl_loop.idl, | |
5721 | &sbrec_datapath_binding_col_external_ids); | |
5722 | ||
5723 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding); | |
5724 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath); | |
5725 | add_column_noalert(ovnsb_idl_loop.idl, | |
5726 | &sbrec_port_binding_col_logical_port); | |
5727 | add_column_noalert(ovnsb_idl_loop.idl, | |
5728 | &sbrec_port_binding_col_tunnel_key); | |
5729 | add_column_noalert(ovnsb_idl_loop.idl, | |
5730 | &sbrec_port_binding_col_parent_port); | |
5731 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag); | |
5732 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); | |
5733 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options); | |
5734 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); | |
f40c5588 MS |
5735 | add_column_noalert(ovnsb_idl_loop.idl, |
5736 | &sbrec_port_binding_col_nat_addresses); | |
331e7aef | 5737 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis); |
6e31816f CSV |
5738 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_mac_binding); |
5739 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_datapath); | |
5740 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_ip); | |
5741 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_mac_binding_col_mac); | |
5742 | add_column_noalert(ovnsb_idl_loop.idl, | |
5743 | &sbrec_mac_binding_col_logical_port); | |
281977f7 NS |
5744 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options); |
5745 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code); | |
5746 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type); | |
5747 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name); | |
33ac3c83 NS |
5748 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options); |
5749 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code); | |
5750 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type); | |
5751 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name); | |
ea382567 RB |
5752 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set); |
5753 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name); | |
5754 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses); | |
5755 | ||
fa183acc BP |
5756 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); |
5757 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); | |
5758 | ||
331e7aef | 5759 | /* Main loop. */ |
7b303ff9 AW |
5760 | exiting = false; |
5761 | while (!exiting) { | |
331e7aef NS |
5762 | struct northd_context ctx = { |
5763 | .ovnnb_idl = ovnnb_idl_loop.idl, | |
5764 | .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), | |
5765 | .ovnsb_idl = ovnsb_idl_loop.idl, | |
5766 | .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), | |
5767 | }; | |
ac0630a2 | 5768 | |
fa183acc BP |
5769 | ovnnb_db_run(&ctx, &ovnsb_idl_loop); |
5770 | ovnsb_db_run(&ctx, &ovnsb_idl_loop); | |
281977f7 NS |
5771 | if (ctx.ovnsb_txn) { |
5772 | check_and_add_supported_dhcp_opts_to_sb_db(&ctx); | |
33ac3c83 | 5773 | check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx); |
281977f7 | 5774 | } |
f93818dd | 5775 | |
331e7aef NS |
5776 | unixctl_server_run(unixctl); |
5777 | unixctl_server_wait(unixctl); | |
5778 | if (exiting) { | |
5779 | poll_immediate_wake(); | |
ac0630a2 | 5780 | } |
331e7aef NS |
5781 | ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); |
5782 | ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); | |
ac0630a2 | 5783 | |
331e7aef | 5784 | poll_block(); |
485f0696 GS |
5785 | if (should_service_stop()) { |
5786 | exiting = true; | |
5787 | } | |
ac0630a2 RB |
5788 | } |
5789 | ||
7b303ff9 | 5790 | unixctl_server_destroy(unixctl); |
331e7aef NS |
5791 | ovsdb_idl_loop_destroy(&ovnnb_idl_loop); |
5792 | ovsdb_idl_loop_destroy(&ovnsb_idl_loop); | |
485f0696 | 5793 | service_stop(); |
ac0630a2 RB |
5794 | |
5795 | exit(res); | |
5796 | } | |
7b303ff9 AW |
5797 | |
5798 | static void | |
5799 | ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
5800 | const char *argv[] OVS_UNUSED, void *exiting_) | |
5801 | { | |
5802 | bool *exiting = exiting_; | |
5803 | *exiting = true; | |
5804 | ||
5805 | unixctl_command_reply(conn, NULL); | |
5806 | } |