]>
Commit | Line | Data |
---|---|---|
ac0630a2 RB |
1 | /* |
2 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
3 | * you may not use this file except in compliance with the License. | |
4 | * You may obtain a copy of the License at: | |
5 | * | |
6 | * http://www.apache.org/licenses/LICENSE-2.0 | |
7 | * | |
8 | * Unless required by applicable law or agreed to in writing, software | |
9 | * distributed under the License is distributed on an "AS IS" BASIS, | |
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
11 | * See the License for the specific language governing permissions and | |
12 | * limitations under the License. | |
13 | */ | |
14 | ||
15 | #include <config.h> | |
16 | ||
17 | #include <getopt.h> | |
18 | #include <stdlib.h> | |
19 | #include <stdio.h> | |
20 | ||
21 | #include "command-line.h" | |
67d9b930 | 22 | #include "daemon.h" |
ac0630a2 | 23 | #include "dirs.h" |
3e8a2ad1 | 24 | #include "openvswitch/dynamic-string.h" |
ac0630a2 | 25 | #include "fatal-signal.h" |
4edcdcf4 | 26 | #include "hash.h" |
ee89ea7b TW |
27 | #include "openvswitch/hmap.h" |
28 | #include "openvswitch/json.h" | |
8b2ed684 | 29 | #include "ovn/lex.h" |
281977f7 | 30 | #include "ovn/lib/ovn-dhcp.h" |
e3df8838 BP |
31 | #include "ovn/lib/ovn-nb-idl.h" |
32 | #include "ovn/lib/ovn-sb-idl.h" | |
218351dd | 33 | #include "ovn/lib/ovn-util.h" |
064d7f84 | 34 | #include "packets.h" |
ac0630a2 | 35 | #include "poll-loop.h" |
5868eb24 | 36 | #include "smap.h" |
7a15be69 | 37 | #include "sset.h" |
ac0630a2 RB |
38 | #include "stream.h" |
39 | #include "stream-ssl.h" | |
7b303ff9 | 40 | #include "unixctl.h" |
ac0630a2 | 41 | #include "util.h" |
4edcdcf4 | 42 | #include "uuid.h" |
ac0630a2 RB |
43 | #include "openvswitch/vlog.h" |
44 | ||
2e2762d4 | 45 | VLOG_DEFINE_THIS_MODULE(ovn_northd); |
ac0630a2 | 46 | |
7b303ff9 AW |
47 | static unixctl_cb_func ovn_northd_exit; |
48 | ||
2e2762d4 | 49 | struct northd_context { |
f93818dd | 50 | struct ovsdb_idl *ovnnb_idl; |
ec78987f | 51 | struct ovsdb_idl *ovnsb_idl; |
f93818dd | 52 | struct ovsdb_idl_txn *ovnnb_txn; |
3c78b3ca | 53 | struct ovsdb_idl_txn *ovnsb_txn; |
f93818dd RB |
54 | }; |
55 | ||
ac0630a2 | 56 | static const char *ovnnb_db; |
ec78987f | 57 | static const char *ovnsb_db; |
ac0630a2 | 58 | |
8639f9be ND |
59 | #define MAC_ADDR_PREFIX 0x0A0000000000ULL |
60 | #define MAC_ADDR_SPACE 0xffffff | |
61 | ||
62 | /* MAC address management (macam) table of "struct eth_addr"s, that holds the | |
63 | * MAC addresses allocated by the OVN ipam module. */ | |
64 | static struct hmap macam = HMAP_INITIALIZER(&macam); | |
880fcd14 BP |
65 | \f |
66 | /* Pipeline stages. */ | |
ac0630a2 | 67 | |
880fcd14 BP |
68 | /* The two pipelines in an OVN logical flow table. */ |
69 | enum ovn_pipeline { | |
70 | P_IN, /* Ingress pipeline. */ | |
71 | P_OUT /* Egress pipeline. */ | |
72 | }; | |
091e3af9 | 73 | |
880fcd14 BP |
74 | /* The two purposes for which ovn-northd uses OVN logical datapaths. */ |
75 | enum ovn_datapath_type { | |
76 | DP_SWITCH, /* OVN logical switch. */ | |
77 | DP_ROUTER /* OVN logical router. */ | |
091e3af9 JP |
78 | }; |
79 | ||
880fcd14 BP |
80 | /* Returns an "enum ovn_stage" built from the arguments. |
81 | * | |
82 | * (It's better to use ovn_stage_build() for type-safety reasons, but inline | |
83 | * functions can't be used in enums or switch cases.) */ | |
84 | #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \ | |
85 | (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE)) | |
86 | ||
87 | /* A stage within an OVN logical switch or router. | |
091e3af9 | 88 | * |
880fcd14 BP |
89 | * An "enum ovn_stage" indicates whether the stage is part of a logical switch |
90 | * or router, whether the stage is part of the ingress or egress pipeline, and | |
91 | * the table within that pipeline. The first three components are combined to | |
685f4dfe | 92 | * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2, |
880fcd14 BP |
93 | * S_ROUTER_OUT_DELIVERY. */ |
94 | enum ovn_stage { | |
e0c9e58b JP |
95 | #define PIPELINE_STAGES \ |
96 | /* Logical switch ingress stages. */ \ | |
685f4dfe NS |
97 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ |
98 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ | |
99 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ | |
100 | PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ | |
7a15be69 GS |
101 | PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ |
102 | PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ | |
103 | PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ | |
104 | PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \ | |
105 | PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \ | |
106 | PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \ | |
281977f7 NS |
107 | PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 10, "ls_in_dhcp_options") \ |
108 | PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 11, "ls_in_dhcp_response") \ | |
109 | PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 12, "ls_in_l2_lkup") \ | |
e0c9e58b JP |
110 | \ |
111 | /* Logical switch egress stages. */ \ | |
7a15be69 GS |
112 | PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ |
113 | PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ | |
114 | PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ | |
115 | PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ | |
116 | PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ | |
117 | PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \ | |
118 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \ | |
119 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \ | |
e0c9e58b JP |
120 | \ |
121 | /* Logical router ingress stages. */ \ | |
122 | PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ | |
123 | PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \ | |
de297547 GS |
124 | PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \ |
125 | PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \ | |
126 | PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \ | |
127 | PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \ | |
128 | PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \ | |
e0c9e58b JP |
129 | \ |
130 | /* Logical router egress stages. */ \ | |
de297547 GS |
131 | PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \ |
132 | PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery") | |
880fcd14 BP |
133 | |
134 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
135 | S_##DP_TYPE##_##PIPELINE##_##STAGE \ | |
136 | = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE), | |
137 | PIPELINE_STAGES | |
138 | #undef PIPELINE_STAGE | |
091e3af9 JP |
139 | }; |
140 | ||
6bb4a18e JP |
141 | /* Due to various hard-coded priorities need to implement ACLs, the |
142 | * northbound database supports a smaller range of ACL priorities than | |
143 | * are available to logical flows. This value is added to an ACL | |
144 | * priority to determine the ACL's logical flow priority. */ | |
145 | #define OVN_ACL_PRI_OFFSET 1000 | |
146 | ||
facf8652 | 147 | #define REGBIT_CONNTRACK_DEFRAG "reg0[0]" |
fa313a8c | 148 | #define REGBIT_CONNTRACK_COMMIT "reg0[1]" |
7a15be69 | 149 | #define REGBIT_CONNTRACK_NAT "reg0[2]" |
281977f7 | 150 | #define REGBIT_DHCP_OPTS_RESULT "reg0[3]" |
facf8652 | 151 | |
880fcd14 BP |
152 | /* Returns an "enum ovn_stage" built from the arguments. */ |
153 | static enum ovn_stage | |
154 | ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline, | |
155 | uint8_t table) | |
156 | { | |
157 | return OVN_STAGE_BUILD(dp_type, pipeline, table); | |
158 | } | |
159 | ||
160 | /* Returns the pipeline to which 'stage' belongs. */ | |
161 | static enum ovn_pipeline | |
162 | ovn_stage_get_pipeline(enum ovn_stage stage) | |
163 | { | |
164 | return (stage >> 8) & 1; | |
165 | } | |
166 | ||
167 | /* Returns the table to which 'stage' belongs. */ | |
168 | static uint8_t | |
169 | ovn_stage_get_table(enum ovn_stage stage) | |
170 | { | |
171 | return stage & 0xff; | |
172 | } | |
173 | ||
174 | /* Returns a string name for 'stage'. */ | |
175 | static const char * | |
176 | ovn_stage_to_str(enum ovn_stage stage) | |
177 | { | |
178 | switch (stage) { | |
179 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
180 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME; | |
181 | PIPELINE_STAGES | |
182 | #undef PIPELINE_STAGE | |
183 | default: return "<unknown>"; | |
184 | } | |
185 | } | |
9a9961d2 BP |
186 | |
187 | /* Returns the type of the datapath to which a flow with the given 'stage' may | |
188 | * be added. */ | |
189 | static enum ovn_datapath_type | |
190 | ovn_stage_to_datapath_type(enum ovn_stage stage) | |
191 | { | |
192 | switch (stage) { | |
193 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
194 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE; | |
195 | PIPELINE_STAGES | |
196 | #undef PIPELINE_STAGE | |
197 | default: OVS_NOT_REACHED(); | |
198 | } | |
199 | } | |
880fcd14 | 200 | \f |
ac0630a2 RB |
201 | static void |
202 | usage(void) | |
203 | { | |
204 | printf("\ | |
205 | %s: OVN northbound management daemon\n\ | |
206 | usage: %s [OPTIONS]\n\ | |
207 | \n\ | |
208 | Options:\n\ | |
209 | --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\ | |
210 | (default: %s)\n\ | |
ec78987f | 211 | --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\ |
ac0630a2 RB |
212 | (default: %s)\n\ |
213 | -h, --help display this help message\n\ | |
214 | -o, --options list available options\n\ | |
215 | -V, --version display version information\n\ | |
60bdd011 | 216 | ", program_name, program_name, default_nb_db(), default_sb_db()); |
67d9b930 | 217 | daemon_usage(); |
ac0630a2 RB |
218 | vlog_usage(); |
219 | stream_usage("database", true, true, false); | |
220 | } | |
221 | \f | |
5868eb24 BP |
222 | struct tnlid_node { |
223 | struct hmap_node hmap_node; | |
224 | uint32_t tnlid; | |
225 | }; | |
226 | ||
227 | static void | |
228 | destroy_tnlids(struct hmap *tnlids) | |
4edcdcf4 | 229 | { |
4ec3d7c7 DDP |
230 | struct tnlid_node *node; |
231 | HMAP_FOR_EACH_POP (node, hmap_node, tnlids) { | |
5868eb24 BP |
232 | free(node); |
233 | } | |
234 | hmap_destroy(tnlids); | |
235 | } | |
236 | ||
237 | static void | |
238 | add_tnlid(struct hmap *set, uint32_t tnlid) | |
239 | { | |
240 | struct tnlid_node *node = xmalloc(sizeof *node); | |
241 | hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0)); | |
242 | node->tnlid = tnlid; | |
4edcdcf4 RB |
243 | } |
244 | ||
4edcdcf4 | 245 | static bool |
5868eb24 | 246 | tnlid_in_use(const struct hmap *set, uint32_t tnlid) |
4edcdcf4 | 247 | { |
5868eb24 BP |
248 | const struct tnlid_node *node; |
249 | HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) { | |
250 | if (node->tnlid == tnlid) { | |
251 | return true; | |
252 | } | |
253 | } | |
254 | return false; | |
255 | } | |
4edcdcf4 | 256 | |
5868eb24 BP |
257 | static uint32_t |
258 | allocate_tnlid(struct hmap *set, const char *name, uint32_t max, | |
259 | uint32_t *hint) | |
260 | { | |
261 | for (uint32_t tnlid = *hint + 1; tnlid != *hint; | |
262 | tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) { | |
263 | if (!tnlid_in_use(set, tnlid)) { | |
264 | add_tnlid(set, tnlid); | |
265 | *hint = tnlid; | |
266 | return tnlid; | |
267 | } | |
4edcdcf4 RB |
268 | } |
269 | ||
5868eb24 BP |
270 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
271 | VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); | |
272 | return 0; | |
273 | } | |
274 | \f | |
9975d7be BP |
275 | /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or |
276 | * sb->external_ids:logical-switch. */ | |
5868eb24 BP |
277 | struct ovn_datapath { |
278 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be | 279 | struct uuid key; /* (nbs/nbr)->header_.uuid. */ |
4edcdcf4 | 280 | |
9975d7be BP |
281 | const struct nbrec_logical_switch *nbs; /* May be NULL. */ |
282 | const struct nbrec_logical_router *nbr; /* May be NULL. */ | |
5868eb24 | 283 | const struct sbrec_datapath_binding *sb; /* May be NULL. */ |
4edcdcf4 | 284 | |
5868eb24 | 285 | struct ovs_list list; /* In list of similar records. */ |
4edcdcf4 | 286 | |
9975d7be | 287 | /* Logical switch data. */ |
86e98048 BP |
288 | struct ovn_port **router_ports; |
289 | size_t n_router_ports; | |
9975d7be | 290 | |
5868eb24 BP |
291 | struct hmap port_tnlids; |
292 | uint32_t port_key_hint; | |
293 | ||
294 | bool has_unknown; | |
8639f9be ND |
295 | |
296 | /* IPAM data. */ | |
297 | struct hmap ipam; | |
298 | }; | |
299 | ||
300 | struct macam_node { | |
301 | struct hmap_node hmap_node; | |
302 | struct eth_addr mac_addr; /* Allocated MAC address. */ | |
5868eb24 BP |
303 | }; |
304 | ||
8639f9be ND |
305 | static void |
306 | cleanup_macam(struct hmap *macam) | |
307 | { | |
308 | struct macam_node *node; | |
309 | HMAP_FOR_EACH_POP (node, hmap_node, macam) { | |
310 | free(node); | |
311 | } | |
312 | } | |
313 | ||
314 | struct ipam_node { | |
315 | struct hmap_node hmap_node; | |
316 | uint32_t ip_addr; /* Allocated IP address. */ | |
317 | }; | |
318 | ||
319 | static void | |
320 | destroy_ipam(struct hmap *ipam) | |
321 | { | |
322 | struct ipam_node *node; | |
323 | HMAP_FOR_EACH_POP (node, hmap_node, ipam) { | |
324 | free(node); | |
325 | } | |
326 | hmap_destroy(ipam); | |
327 | } | |
328 | ||
5868eb24 BP |
329 | static struct ovn_datapath * |
330 | ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, | |
9975d7be BP |
331 | const struct nbrec_logical_switch *nbs, |
332 | const struct nbrec_logical_router *nbr, | |
5868eb24 BP |
333 | const struct sbrec_datapath_binding *sb) |
334 | { | |
335 | struct ovn_datapath *od = xzalloc(sizeof *od); | |
336 | od->key = *key; | |
337 | od->sb = sb; | |
9975d7be BP |
338 | od->nbs = nbs; |
339 | od->nbr = nbr; | |
5868eb24 | 340 | hmap_init(&od->port_tnlids); |
8639f9be | 341 | hmap_init(&od->ipam); |
5868eb24 BP |
342 | od->port_key_hint = 0; |
343 | hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); | |
344 | return od; | |
345 | } | |
346 | ||
347 | static void | |
348 | ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) | |
349 | { | |
350 | if (od) { | |
351 | /* Don't remove od->list. It is used within build_datapaths() as a | |
352 | * private list and once we've exited that function it is not safe to | |
353 | * use it. */ | |
354 | hmap_remove(datapaths, &od->key_node); | |
355 | destroy_tnlids(&od->port_tnlids); | |
8639f9be | 356 | destroy_ipam(&od->ipam); |
86e98048 | 357 | free(od->router_ports); |
5868eb24 BP |
358 | free(od); |
359 | } | |
360 | } | |
361 | ||
9a9961d2 BP |
362 | /* Returns 'od''s datapath type. */ |
363 | static enum ovn_datapath_type | |
364 | ovn_datapath_get_type(const struct ovn_datapath *od) | |
365 | { | |
366 | return od->nbs ? DP_SWITCH : DP_ROUTER; | |
367 | } | |
368 | ||
5868eb24 BP |
369 | static struct ovn_datapath * |
370 | ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) | |
371 | { | |
372 | struct ovn_datapath *od; | |
373 | ||
374 | HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) { | |
375 | if (uuid_equals(uuid, &od->key)) { | |
376 | return od; | |
377 | } | |
378 | } | |
379 | return NULL; | |
380 | } | |
381 | ||
382 | static struct ovn_datapath * | |
383 | ovn_datapath_from_sbrec(struct hmap *datapaths, | |
384 | const struct sbrec_datapath_binding *sb) | |
385 | { | |
386 | struct uuid key; | |
387 | ||
9975d7be BP |
388 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
389 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
5868eb24 BP |
390 | return NULL; |
391 | } | |
392 | return ovn_datapath_find(datapaths, &key); | |
393 | } | |
394 | ||
5412db30 J |
395 | static bool |
396 | lrouter_is_enabled(const struct nbrec_logical_router *lrouter) | |
397 | { | |
398 | return !lrouter->enabled || *lrouter->enabled; | |
399 | } | |
400 | ||
5868eb24 BP |
401 | static void |
402 | join_datapaths(struct northd_context *ctx, struct hmap *datapaths, | |
403 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
404 | struct ovs_list *both) | |
405 | { | |
406 | hmap_init(datapaths); | |
417e7e66 BW |
407 | ovs_list_init(sb_only); |
408 | ovs_list_init(nb_only); | |
409 | ovs_list_init(both); | |
5868eb24 BP |
410 | |
411 | const struct sbrec_datapath_binding *sb, *sb_next; | |
412 | SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { | |
413 | struct uuid key; | |
9975d7be BP |
414 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
415 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
416 | ovsdb_idl_txn_add_comment( | |
417 | ctx->ovnsb_txn, | |
418 | "deleting Datapath_Binding "UUID_FMT" that lacks " | |
419 | "external-ids:logical-switch and " | |
420 | "external-ids:logical-router", | |
421 | UUID_ARGS(&sb->header_.uuid)); | |
5868eb24 BP |
422 | sbrec_datapath_binding_delete(sb); |
423 | continue; | |
424 | } | |
425 | ||
426 | if (ovn_datapath_find(datapaths, &key)) { | |
427 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
9975d7be BP |
428 | VLOG_INFO_RL( |
429 | &rl, "deleting Datapath_Binding "UUID_FMT" with " | |
430 | "duplicate external-ids:logical-switch/router "UUID_FMT, | |
431 | UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key)); | |
5868eb24 BP |
432 | sbrec_datapath_binding_delete(sb); |
433 | continue; | |
434 | } | |
435 | ||
436 | struct ovn_datapath *od = ovn_datapath_create(datapaths, &key, | |
9975d7be | 437 | NULL, NULL, sb); |
417e7e66 | 438 | ovs_list_push_back(sb_only, &od->list); |
5868eb24 BP |
439 | } |
440 | ||
9975d7be BP |
441 | const struct nbrec_logical_switch *nbs; |
442 | NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) { | |
5868eb24 | 443 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
9975d7be | 444 | &nbs->header_.uuid); |
5868eb24 | 445 | if (od) { |
9975d7be | 446 | od->nbs = nbs; |
417e7e66 BW |
447 | ovs_list_remove(&od->list); |
448 | ovs_list_push_back(both, &od->list); | |
5868eb24 | 449 | } else { |
9975d7be BP |
450 | od = ovn_datapath_create(datapaths, &nbs->header_.uuid, |
451 | nbs, NULL, NULL); | |
417e7e66 | 452 | ovs_list_push_back(nb_only, &od->list); |
5868eb24 BP |
453 | } |
454 | } | |
9975d7be BP |
455 | |
456 | const struct nbrec_logical_router *nbr; | |
457 | NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) { | |
5412db30 J |
458 | if (!lrouter_is_enabled(nbr)) { |
459 | continue; | |
460 | } | |
461 | ||
9975d7be BP |
462 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
463 | &nbr->header_.uuid); | |
464 | if (od) { | |
465 | if (!od->nbs) { | |
466 | od->nbr = nbr; | |
417e7e66 BW |
467 | ovs_list_remove(&od->list); |
468 | ovs_list_push_back(both, &od->list); | |
9975d7be BP |
469 | } else { |
470 | /* Can't happen! */ | |
471 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
472 | VLOG_WARN_RL(&rl, | |
473 | "duplicate UUID "UUID_FMT" in OVN_Northbound", | |
474 | UUID_ARGS(&nbr->header_.uuid)); | |
475 | continue; | |
476 | } | |
477 | } else { | |
478 | od = ovn_datapath_create(datapaths, &nbr->header_.uuid, | |
479 | NULL, nbr, NULL); | |
417e7e66 | 480 | ovs_list_push_back(nb_only, &od->list); |
9975d7be | 481 | } |
9975d7be | 482 | } |
5868eb24 BP |
483 | } |
484 | ||
485 | static uint32_t | |
486 | ovn_datapath_allocate_key(struct hmap *dp_tnlids) | |
487 | { | |
488 | static uint32_t hint; | |
489 | return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint); | |
490 | } | |
491 | ||
0bac7164 BP |
492 | /* Updates the southbound Datapath_Binding table so that it contains the |
493 | * logical switches and routers specified by the northbound database. | |
494 | * | |
495 | * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical | |
496 | * switch and router. */ | |
5868eb24 BP |
497 | static void |
498 | build_datapaths(struct northd_context *ctx, struct hmap *datapaths) | |
499 | { | |
500 | struct ovs_list sb_only, nb_only, both; | |
501 | ||
502 | join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both); | |
503 | ||
417e7e66 | 504 | if (!ovs_list_is_empty(&nb_only)) { |
5868eb24 BP |
505 | /* First index the in-use datapath tunnel IDs. */ |
506 | struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); | |
507 | struct ovn_datapath *od; | |
508 | LIST_FOR_EACH (od, list, &both) { | |
509 | add_tnlid(&dp_tnlids, od->sb->tunnel_key); | |
510 | } | |
511 | ||
512 | /* Add southbound record for each unmatched northbound record. */ | |
513 | LIST_FOR_EACH (od, list, &nb_only) { | |
514 | uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids); | |
515 | if (!tunnel_key) { | |
516 | break; | |
517 | } | |
518 | ||
519 | od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn); | |
520 | ||
5868eb24 | 521 | char uuid_s[UUID_LEN + 1]; |
9975d7be BP |
522 | sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key)); |
523 | const char *key = od->nbs ? "logical-switch" : "logical-router"; | |
524 | const struct smap id = SMAP_CONST1(&id, key, uuid_s); | |
aaf881c6 | 525 | sbrec_datapath_binding_set_external_ids(od->sb, &id); |
5868eb24 BP |
526 | |
527 | sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key); | |
528 | } | |
529 | destroy_tnlids(&dp_tnlids); | |
530 | } | |
531 | ||
532 | /* Delete southbound records without northbound matches. */ | |
533 | struct ovn_datapath *od, *next; | |
534 | LIST_FOR_EACH_SAFE (od, next, list, &sb_only) { | |
417e7e66 | 535 | ovs_list_remove(&od->list); |
5868eb24 BP |
536 | sbrec_datapath_binding_delete(od->sb); |
537 | ovn_datapath_destroy(datapaths, od); | |
538 | } | |
539 | } | |
540 | \f | |
541 | struct ovn_port { | |
542 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be BP |
543 | char *key; /* nbs->name, nbr->name, sb->logical_port. */ |
544 | char *json_key; /* 'key', quoted for use in JSON. */ | |
5868eb24 | 545 | |
9975d7be BP |
546 | const struct sbrec_port_binding *sb; /* May be NULL. */ |
547 | ||
e93b43d6 | 548 | /* Logical switch port data. */ |
0ee00741 | 549 | const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */ |
e93b43d6 JP |
550 | |
551 | struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */ | |
552 | unsigned int n_lsp_addrs; | |
553 | ||
554 | struct lport_addresses *ps_addrs; /* Port security addresses. */ | |
555 | unsigned int n_ps_addrs; | |
556 | ||
9975d7be | 557 | /* Logical router port data. */ |
0ee00741 | 558 | const struct nbrec_logical_router_port *nbrp; /* May be NULL. */ |
e93b43d6 | 559 | |
4685e523 | 560 | struct lport_addresses lrp_networks; |
c9bdf7bd | 561 | |
ad386c3f BP |
562 | /* The port's peer: |
563 | * | |
564 | * - A switch port S of type "router" has a router port R as a peer, | |
565 | * and R in turn has S has its peer. | |
566 | * | |
567 | * - Two connected logical router ports have each other as peer. */ | |
9975d7be | 568 | struct ovn_port *peer; |
5868eb24 BP |
569 | |
570 | struct ovn_datapath *od; | |
571 | ||
572 | struct ovs_list list; /* In list of similar records. */ | |
573 | }; | |
574 | ||
575 | static struct ovn_port * | |
576 | ovn_port_create(struct hmap *ports, const char *key, | |
0ee00741 HK |
577 | const struct nbrec_logical_switch_port *nbsp, |
578 | const struct nbrec_logical_router_port *nbrp, | |
5868eb24 BP |
579 | const struct sbrec_port_binding *sb) |
580 | { | |
581 | struct ovn_port *op = xzalloc(sizeof *op); | |
9975d7be BP |
582 | |
583 | struct ds json_key = DS_EMPTY_INITIALIZER; | |
584 | json_string_escape(key, &json_key); | |
585 | op->json_key = ds_steal_cstr(&json_key); | |
586 | ||
587 | op->key = xstrdup(key); | |
5868eb24 | 588 | op->sb = sb; |
0ee00741 HK |
589 | op->nbsp = nbsp; |
590 | op->nbrp = nbrp; | |
5868eb24 BP |
591 | hmap_insert(ports, &op->key_node, hash_string(op->key, 0)); |
592 | return op; | |
593 | } | |
594 | ||
595 | static void | |
596 | ovn_port_destroy(struct hmap *ports, struct ovn_port *port) | |
597 | { | |
598 | if (port) { | |
599 | /* Don't remove port->list. It is used within build_ports() as a | |
600 | * private list and once we've exited that function it is not safe to | |
601 | * use it. */ | |
602 | hmap_remove(ports, &port->key_node); | |
e93b43d6 JP |
603 | |
604 | for (int i = 0; i < port->n_lsp_addrs; i++) { | |
605 | destroy_lport_addresses(&port->lsp_addrs[i]); | |
606 | } | |
607 | free(port->lsp_addrs); | |
608 | ||
609 | for (int i = 0; i < port->n_ps_addrs; i++) { | |
610 | destroy_lport_addresses(&port->ps_addrs[i]); | |
611 | } | |
612 | free(port->ps_addrs); | |
613 | ||
4685e523 | 614 | destroy_lport_addresses(&port->lrp_networks); |
9975d7be BP |
615 | free(port->json_key); |
616 | free(port->key); | |
5868eb24 BP |
617 | free(port); |
618 | } | |
619 | } | |
620 | ||
621 | static struct ovn_port * | |
622 | ovn_port_find(struct hmap *ports, const char *name) | |
623 | { | |
624 | struct ovn_port *op; | |
625 | ||
626 | HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) { | |
627 | if (!strcmp(op->key, name)) { | |
628 | return op; | |
629 | } | |
630 | } | |
631 | return NULL; | |
632 | } | |
633 | ||
634 | static uint32_t | |
635 | ovn_port_allocate_key(struct ovn_datapath *od) | |
636 | { | |
637 | return allocate_tnlid(&od->port_tnlids, "port", | |
638 | (1u << 15) - 1, &od->port_key_hint); | |
639 | } | |
640 | ||
8639f9be ND |
641 | static bool |
642 | ipam_is_duplicate_mac(struct eth_addr *ea, uint64_t mac64, bool warn) | |
643 | { | |
644 | struct macam_node *macam_node; | |
645 | HMAP_FOR_EACH_WITH_HASH (macam_node, hmap_node, hash_uint64(mac64), | |
646 | &macam) { | |
647 | if (eth_addr_equals(*ea, macam_node->mac_addr)) { | |
648 | if (warn) { | |
649 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
650 | VLOG_WARN_RL(&rl, "Duplicate MAC set: "ETH_ADDR_FMT, | |
651 | ETH_ADDR_ARGS(macam_node->mac_addr)); | |
652 | } | |
653 | return true; | |
654 | } | |
655 | } | |
656 | return false; | |
657 | } | |
658 | ||
659 | static bool | |
660 | ipam_is_duplicate_ip(struct ovn_datapath *od, uint32_t ip, bool warn) | |
661 | { | |
662 | struct ipam_node *ipam_node; | |
663 | HMAP_FOR_EACH_WITH_HASH (ipam_node, hmap_node, hash_int(ip, 0), | |
664 | &od->ipam) { | |
665 | if (ipam_node->ip_addr == ip) { | |
666 | if (warn) { | |
667 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
668 | VLOG_WARN_RL(&rl, "Duplicate IP set: "IP_FMT, | |
669 | IP_ARGS(htonl(ip))); | |
670 | } | |
671 | return true; | |
672 | } | |
673 | } | |
674 | return false; | |
675 | } | |
676 | ||
677 | static void | |
678 | ipam_insert_mac(struct eth_addr *ea, bool check) | |
679 | { | |
680 | if (!ea) { | |
681 | return; | |
682 | } | |
683 | ||
684 | uint64_t mac64 = eth_addr_to_uint64(*ea); | |
685 | /* If the new MAC was not assigned by this address management system or | |
686 | * check is true and the new MAC is a duplicate, do not insert it into the | |
687 | * macam hmap. */ | |
688 | if (((mac64 ^ MAC_ADDR_PREFIX) >> 24) | |
689 | || (check && ipam_is_duplicate_mac(ea, mac64, true))) { | |
690 | return; | |
691 | } | |
692 | ||
693 | struct macam_node *new_macam_node = xmalloc(sizeof *new_macam_node); | |
694 | new_macam_node->mac_addr = *ea; | |
695 | hmap_insert(&macam, &new_macam_node->hmap_node, hash_uint64(mac64)); | |
696 | } | |
697 | ||
698 | static void | |
699 | ipam_insert_ip(struct ovn_datapath *od, uint32_t ip, bool check) | |
700 | { | |
701 | if (!od) { | |
702 | return; | |
703 | } | |
704 | ||
705 | if (check && ipam_is_duplicate_ip(od, ip, true)) { | |
706 | return; | |
707 | } | |
708 | ||
709 | struct ipam_node *new_ipam_node = xmalloc(sizeof *new_ipam_node); | |
710 | new_ipam_node->ip_addr = ip; | |
711 | hmap_insert(&od->ipam, &new_ipam_node->hmap_node, hash_int(ip, 0)); | |
712 | } | |
713 | ||
714 | static void | |
715 | ipam_insert_lsp_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
716 | char *address) | |
717 | { | |
718 | if (!od || !op || !address || !strcmp(address, "unknown") | |
719 | || !strcmp(address, "dynamic")) { | |
720 | return; | |
721 | } | |
722 | ||
723 | struct lport_addresses laddrs; | |
724 | if (!extract_lsp_addresses(address, &laddrs)) { | |
725 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
726 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
727 | return; | |
728 | } | |
729 | ipam_insert_mac(&laddrs.ea, true); | |
730 | ||
731 | /* IP is only added to IPAM if the switch's subnet option | |
732 | * is set, whereas MAC is always added to MACAM. */ | |
733 | if (!smap_get(&od->nbs->other_config, "subnet")) { | |
734 | destroy_lport_addresses(&laddrs); | |
735 | return; | |
736 | } | |
737 | ||
738 | for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) { | |
739 | uint32_t ip = ntohl(laddrs.ipv4_addrs[j].addr); | |
740 | ipam_insert_ip(od, ip, true); | |
741 | } | |
742 | ||
743 | destroy_lport_addresses(&laddrs); | |
744 | } | |
745 | ||
746 | static void | |
747 | ipam_add_port_addresses(struct ovn_datapath *od, struct ovn_port *op) | |
748 | { | |
749 | if (!od || !op) { | |
750 | return; | |
751 | } | |
752 | ||
753 | if (op->nbsp) { | |
754 | /* Add all the port's addresses to address data structures. */ | |
755 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { | |
756 | ipam_insert_lsp_addresses(od, op, op->nbsp->addresses[i]); | |
757 | } | |
758 | if (op->nbsp->dynamic_addresses) { | |
759 | ipam_insert_lsp_addresses(od, op, op->nbsp->dynamic_addresses); | |
760 | } | |
761 | } else if (op->nbrp) { | |
762 | struct lport_addresses lrp_networks; | |
763 | if (!extract_lrp_networks(op->nbrp, &lrp_networks)) { | |
764 | static struct vlog_rate_limit rl | |
765 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
766 | VLOG_WARN_RL(&rl, "Extract addresses failed."); | |
767 | return; | |
768 | } | |
769 | ipam_insert_mac(&lrp_networks.ea, true); | |
770 | ||
771 | if (!op->peer || !op->peer->nbsp || !op->peer->od || !op->peer->od->nbs | |
772 | || !smap_get(&op->peer->od->nbs->other_config, "subnet")) { | |
773 | destroy_lport_addresses(&lrp_networks); | |
774 | return; | |
775 | } | |
776 | ||
777 | for (size_t i = 0; i < lrp_networks.n_ipv4_addrs; i++) { | |
778 | uint32_t ip = ntohl(lrp_networks.ipv4_addrs[i].addr); | |
779 | ipam_insert_ip(op->peer->od, ip, true); | |
780 | } | |
781 | ||
782 | destroy_lport_addresses(&lrp_networks); | |
783 | } | |
784 | } | |
785 | ||
786 | static uint64_t | |
787 | ipam_get_unused_mac(void) | |
788 | { | |
789 | /* Stores the suffix of the most recently ipam-allocated MAC address. */ | |
790 | static uint32_t last_mac; | |
791 | ||
792 | uint64_t mac64; | |
793 | struct eth_addr mac; | |
794 | uint32_t mac_addr_suffix, i; | |
795 | for (i = 0; i < MAC_ADDR_SPACE - 1; i++) { | |
796 | /* The tentative MAC's suffix will be in the interval (1, 0xfffffe). */ | |
797 | mac_addr_suffix = ((last_mac + i) % (MAC_ADDR_SPACE - 1)) + 1; | |
798 | mac64 = MAC_ADDR_PREFIX | mac_addr_suffix; | |
799 | eth_addr_from_uint64(mac64, &mac); | |
800 | if (!ipam_is_duplicate_mac(&mac, mac64, false)) { | |
801 | last_mac = mac_addr_suffix; | |
802 | break; | |
803 | } | |
804 | } | |
805 | ||
806 | if (i == MAC_ADDR_SPACE) { | |
807 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
808 | VLOG_WARN_RL(&rl, "MAC address space exhausted."); | |
809 | mac64 = 0; | |
810 | } | |
811 | ||
812 | return mac64; | |
813 | } | |
814 | ||
815 | static uint32_t | |
816 | ipam_get_unused_ip(struct ovn_datapath *od, uint32_t subnet, uint32_t mask) | |
817 | { | |
818 | if (!od) { | |
819 | return 0; | |
820 | } | |
821 | ||
822 | uint32_t ip = 0; | |
823 | ||
824 | /* Find an unused IP address in subnet. x.x.x.1 is reserved for a | |
825 | * logical router port. */ | |
826 | for (uint32_t i = 2; i < ~mask; i++) { | |
827 | uint32_t tentative_ip = subnet + i; | |
828 | if (!ipam_is_duplicate_ip(od, tentative_ip, false)) { | |
829 | ip = tentative_ip; | |
830 | break; | |
831 | } | |
832 | } | |
833 | ||
834 | if (!ip) { | |
835 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
836 | VLOG_WARN_RL( &rl, "Subnet address space has been exhausted."); | |
837 | } | |
838 | ||
839 | return ip; | |
840 | } | |
841 | ||
842 | static bool | |
843 | ipam_allocate_addresses(struct ovn_datapath *od, struct ovn_port *op, | |
844 | ovs_be32 subnet, ovs_be32 mask) | |
845 | { | |
846 | if (!od || !op || !op->nbsp) { | |
847 | return false; | |
848 | } | |
849 | ||
850 | uint32_t ip = ipam_get_unused_ip(od, ntohl(subnet), ntohl(mask)); | |
851 | if (!ip) { | |
852 | return false; | |
853 | } | |
854 | ||
855 | struct eth_addr mac; | |
856 | uint64_t mac64 = ipam_get_unused_mac(); | |
857 | if (!mac64) { | |
858 | return false; | |
859 | } | |
860 | eth_addr_from_uint64(mac64, &mac); | |
861 | ||
862 | /* Add MAC/IP to MACAM/IPAM hmaps if both addresses were allocated | |
863 | * successfully. */ | |
864 | ipam_insert_ip(od, ip, false); | |
865 | ipam_insert_mac(&mac, false); | |
866 | ||
867 | char *new_addr = xasprintf(ETH_ADDR_FMT" "IP_FMT, | |
868 | ETH_ADDR_ARGS(mac), IP_ARGS(htonl(ip))); | |
869 | nbrec_logical_switch_port_set_dynamic_addresses(op->nbsp, new_addr); | |
870 | free(new_addr); | |
871 | ||
872 | return true; | |
873 | } | |
874 | ||
875 | static void | |
876 | build_ipam(struct northd_context *ctx, struct hmap *datapaths, | |
877 | struct hmap *ports) | |
878 | { | |
879 | /* IPAM generally stands for IP address management. In non-virtualized | |
880 | * world, MAC addresses come with the hardware. But, with virtualized | |
881 | * workloads, they need to be assigned and managed. This function | |
882 | * does both IP address management (ipam) and MAC address management | |
883 | * (macam). */ | |
884 | ||
885 | if (!ctx->ovnnb_txn) { | |
886 | return; | |
887 | } | |
888 | ||
889 | /* If the switch's other_config:subnet is set, allocate new addresses for | |
890 | * ports that have the "dynamic" keyword in their addresses column. */ | |
891 | struct ovn_datapath *od; | |
892 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
893 | if (od->nbs) { | |
894 | const char *subnet_str = smap_get(&od->nbs->other_config, | |
895 | "subnet"); | |
896 | if (!subnet_str) { | |
897 | continue; | |
898 | } | |
899 | ||
900 | ovs_be32 subnet, mask; | |
901 | char *error = ip_parse_masked(subnet_str, &subnet, &mask); | |
902 | if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) { | |
903 | static struct vlog_rate_limit rl | |
904 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
905 | VLOG_WARN_RL(&rl, "bad 'subnet' %s", subnet_str); | |
906 | free(error); | |
907 | continue; | |
908 | } | |
909 | ||
910 | struct ovn_port *op; | |
911 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
912 | const struct nbrec_logical_switch_port *nbsp = | |
913 | od->nbs->ports[i]; | |
914 | ||
915 | if (!nbsp) { | |
916 | continue; | |
917 | } | |
918 | ||
919 | op = ovn_port_find(ports, nbsp->name); | |
920 | if (!op || (op->nbsp && op->peer)) { | |
921 | /* Do not allocate addresses for logical switch ports that | |
922 | * have a peer. */ | |
923 | continue; | |
924 | } | |
925 | ||
926 | for (size_t j = 0; j < nbsp->n_addresses; j++) { | |
927 | if (!strcmp(nbsp->addresses[j], "dynamic") | |
928 | && !nbsp->dynamic_addresses) { | |
929 | if (!ipam_allocate_addresses(od, op, subnet, mask) | |
930 | || !extract_lsp_addresses(nbsp->dynamic_addresses, | |
931 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
932 | static struct vlog_rate_limit rl | |
933 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
934 | VLOG_INFO_RL(&rl, "Failed to allocate address."); | |
935 | } else { | |
936 | op->n_lsp_addrs++; | |
937 | } | |
938 | break; | |
939 | } | |
940 | } | |
941 | } | |
942 | } | |
943 | } | |
944 | } | |
945 | \f | |
946 | ||
5868eb24 BP |
947 | static void |
948 | join_logical_ports(struct northd_context *ctx, | |
949 | struct hmap *datapaths, struct hmap *ports, | |
950 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
951 | struct ovs_list *both) | |
952 | { | |
953 | hmap_init(ports); | |
417e7e66 BW |
954 | ovs_list_init(sb_only); |
955 | ovs_list_init(nb_only); | |
956 | ovs_list_init(both); | |
5868eb24 BP |
957 | |
958 | const struct sbrec_port_binding *sb; | |
959 | SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) { | |
960 | struct ovn_port *op = ovn_port_create(ports, sb->logical_port, | |
9975d7be | 961 | NULL, NULL, sb); |
417e7e66 | 962 | ovs_list_push_back(sb_only, &op->list); |
5868eb24 BP |
963 | } |
964 | ||
965 | struct ovn_datapath *od; | |
966 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
967 | if (od->nbs) { |
968 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
0ee00741 HK |
969 | const struct nbrec_logical_switch_port *nbsp |
970 | = od->nbs->ports[i]; | |
971 | struct ovn_port *op = ovn_port_find(ports, nbsp->name); | |
9975d7be | 972 | if (op) { |
0ee00741 | 973 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
974 | static struct vlog_rate_limit rl |
975 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
976 | VLOG_WARN_RL(&rl, "duplicate logical port %s", | |
0ee00741 | 977 | nbsp->name); |
9975d7be BP |
978 | continue; |
979 | } | |
0ee00741 | 980 | op->nbsp = nbsp; |
417e7e66 BW |
981 | ovs_list_remove(&op->list); |
982 | ovs_list_push_back(both, &op->list); | |
e93b43d6 JP |
983 | |
984 | /* This port exists due to a SB binding, but should | |
985 | * not have been initialized fully. */ | |
986 | ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs); | |
9975d7be | 987 | } else { |
0ee00741 | 988 | op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL); |
417e7e66 | 989 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
990 | } |
991 | ||
e93b43d6 | 992 | op->lsp_addrs |
0ee00741 HK |
993 | = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses); |
994 | for (size_t j = 0; j < nbsp->n_addresses; j++) { | |
995 | if (!strcmp(nbsp->addresses[j], "unknown")) { | |
e93b43d6 JP |
996 | continue; |
997 | } | |
8639f9be ND |
998 | if (!strcmp(nbsp->addresses[j], "dynamic")) { |
999 | if (nbsp->dynamic_addresses) { | |
1000 | if (!extract_lsp_addresses(nbsp->dynamic_addresses, | |
1001 | &op->lsp_addrs[op->n_lsp_addrs])) { | |
1002 | static struct vlog_rate_limit rl | |
1003 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1004 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in " | |
1005 | "logical switch port " | |
1006 | "dynamic_addresses. No " | |
1007 | "MAC address found", | |
1008 | op->nbsp->dynamic_addresses); | |
1009 | continue; | |
1010 | } | |
1011 | } else { | |
1012 | continue; | |
1013 | } | |
1014 | } else if (!extract_lsp_addresses(nbsp->addresses[j], | |
e93b43d6 JP |
1015 | &op->lsp_addrs[op->n_lsp_addrs])) { |
1016 | static struct vlog_rate_limit rl | |
1017 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1018 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical " | |
1019 | "switch port addresses. No MAC " | |
1020 | "address found", | |
0ee00741 | 1021 | op->nbsp->addresses[j]); |
e93b43d6 JP |
1022 | continue; |
1023 | } | |
1024 | op->n_lsp_addrs++; | |
1025 | } | |
1026 | ||
1027 | op->ps_addrs | |
0ee00741 HK |
1028 | = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security); |
1029 | for (size_t j = 0; j < nbsp->n_port_security; j++) { | |
1030 | if (!extract_lsp_addresses(nbsp->port_security[j], | |
e93b43d6 JP |
1031 | &op->ps_addrs[op->n_ps_addrs])) { |
1032 | static struct vlog_rate_limit rl | |
1033 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
1034 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in port " | |
1035 | "security. No MAC address found", | |
0ee00741 | 1036 | op->nbsp->port_security[j]); |
e93b43d6 JP |
1037 | continue; |
1038 | } | |
1039 | op->n_ps_addrs++; | |
1040 | } | |
1041 | ||
9975d7be | 1042 | op->od = od; |
8639f9be | 1043 | ipam_add_port_addresses(od, op); |
9975d7be BP |
1044 | } |
1045 | } else { | |
1046 | for (size_t i = 0; i < od->nbr->n_ports; i++) { | |
0ee00741 HK |
1047 | const struct nbrec_logical_router_port *nbrp |
1048 | = od->nbr->ports[i]; | |
9975d7be | 1049 | |
4685e523 | 1050 | struct lport_addresses lrp_networks; |
0ee00741 | 1051 | if (!extract_lrp_networks(nbrp, &lrp_networks)) { |
9975d7be BP |
1052 | static struct vlog_rate_limit rl |
1053 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
0ee00741 | 1054 | VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac); |
9975d7be BP |
1055 | continue; |
1056 | } | |
1057 | ||
4685e523 | 1058 | if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) { |
9975d7be BP |
1059 | continue; |
1060 | } | |
1061 | ||
0ee00741 | 1062 | struct ovn_port *op = ovn_port_find(ports, nbrp->name); |
9975d7be | 1063 | if (op) { |
0ee00741 | 1064 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
1065 | static struct vlog_rate_limit rl |
1066 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
1067 | VLOG_WARN_RL(&rl, "duplicate logical router port %s", | |
0ee00741 | 1068 | nbrp->name); |
9975d7be BP |
1069 | continue; |
1070 | } | |
0ee00741 | 1071 | op->nbrp = nbrp; |
417e7e66 BW |
1072 | ovs_list_remove(&op->list); |
1073 | ovs_list_push_back(both, &op->list); | |
4685e523 JP |
1074 | |
1075 | /* This port exists but should not have been | |
1076 | * initialized fully. */ | |
1077 | ovs_assert(!op->lrp_networks.n_ipv4_addrs | |
1078 | && !op->lrp_networks.n_ipv6_addrs); | |
9975d7be | 1079 | } else { |
0ee00741 | 1080 | op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL); |
417e7e66 | 1081 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
1082 | } |
1083 | ||
4685e523 | 1084 | op->lrp_networks = lrp_networks; |
9975d7be | 1085 | op->od = od; |
8639f9be | 1086 | ipam_add_port_addresses(op->od, op); |
5868eb24 | 1087 | } |
9975d7be BP |
1088 | } |
1089 | } | |
1090 | ||
1091 | /* Connect logical router ports, and logical switch ports of type "router", | |
1092 | * to their peers. */ | |
1093 | struct ovn_port *op; | |
1094 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 HK |
1095 | if (op->nbsp && !strcmp(op->nbsp->type, "router")) { |
1096 | const char *peer_name = smap_get(&op->nbsp->options, "router-port"); | |
9975d7be BP |
1097 | if (!peer_name) { |
1098 | continue; | |
1099 | } | |
1100 | ||
1101 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 1102 | if (!peer || !peer->nbrp) { |
9975d7be BP |
1103 | continue; |
1104 | } | |
1105 | ||
1106 | peer->peer = op; | |
1107 | op->peer = peer; | |
86e98048 BP |
1108 | op->od->router_ports = xrealloc( |
1109 | op->od->router_ports, | |
1110 | sizeof *op->od->router_ports * (op->od->n_router_ports + 1)); | |
1111 | op->od->router_ports[op->od->n_router_ports++] = op; | |
0ee00741 | 1112 | } else if (op->nbrp && op->nbrp->peer) { |
ad386c3f BP |
1113 | struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer); |
1114 | if (peer) { | |
1115 | if (peer->nbrp) { | |
1116 | op->peer = peer; | |
60fa6dbb | 1117 | } else if (peer->nbsp) { |
ad386c3f BP |
1118 | /* An ovn_port for a switch port of type "router" does have |
1119 | * a router port as its peer (see the case above for | |
1120 | * "router" ports), but this is set via options:router-port | |
1121 | * in Logical_Switch_Port and does not involve the | |
1122 | * Logical_Router_Port's 'peer' column. */ | |
1123 | static struct vlog_rate_limit rl = | |
1124 | VLOG_RATE_LIMIT_INIT(5, 1); | |
1125 | VLOG_WARN_RL(&rl, "Bad configuration: The peer of router " | |
1126 | "port %s is a switch port", op->key); | |
1127 | } | |
1128 | } | |
5868eb24 BP |
1129 | } |
1130 | } | |
1131 | } | |
1132 | ||
1133 | static void | |
1134 | ovn_port_update_sbrec(const struct ovn_port *op) | |
1135 | { | |
1136 | sbrec_port_binding_set_datapath(op->sb, op->od->sb); | |
0ee00741 | 1137 | if (op->nbrp) { |
c1645003 | 1138 | /* If the router is for l3 gateway, it resides on a chassis |
17bac0ff | 1139 | * and its port type is "l3gateway". */ |
c1645003 GS |
1140 | const char *chassis = smap_get(&op->od->nbr->options, "chassis"); |
1141 | if (chassis) { | |
17bac0ff | 1142 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1143 | } else { |
1144 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1145 | } | |
9975d7be BP |
1146 | |
1147 | const char *peer = op->peer ? op->peer->key : "<error>"; | |
c1645003 GS |
1148 | struct smap new; |
1149 | smap_init(&new); | |
1150 | smap_add(&new, "peer", peer); | |
1151 | if (chassis) { | |
17bac0ff | 1152 | smap_add(&new, "l3gateway-chassis", chassis); |
c1645003 GS |
1153 | } |
1154 | sbrec_port_binding_set_options(op->sb, &new); | |
1155 | smap_destroy(&new); | |
9975d7be BP |
1156 | |
1157 | sbrec_port_binding_set_parent_port(op->sb, NULL); | |
1158 | sbrec_port_binding_set_tag(op->sb, NULL, 0); | |
1159 | sbrec_port_binding_set_mac(op->sb, NULL, 0); | |
1160 | } else { | |
0ee00741 HK |
1161 | if (strcmp(op->nbsp->type, "router")) { |
1162 | sbrec_port_binding_set_type(op->sb, op->nbsp->type); | |
1163 | sbrec_port_binding_set_options(op->sb, &op->nbsp->options); | |
9975d7be | 1164 | } else { |
c1645003 GS |
1165 | const char *chassis = NULL; |
1166 | if (op->peer && op->peer->od && op->peer->od->nbr) { | |
1167 | chassis = smap_get(&op->peer->od->nbr->options, "chassis"); | |
1168 | } | |
1169 | ||
1170 | /* A switch port connected to a gateway router is also of | |
17bac0ff | 1171 | * type "l3gateway". */ |
c1645003 | 1172 | if (chassis) { |
17bac0ff | 1173 | sbrec_port_binding_set_type(op->sb, "l3gateway"); |
c1645003 GS |
1174 | } else { |
1175 | sbrec_port_binding_set_type(op->sb, "patch"); | |
1176 | } | |
9975d7be | 1177 | |
f99f67bd BP |
1178 | const char *router_port = smap_get_def(&op->nbsp->options, |
1179 | "router-port", "<error>"); | |
c1645003 GS |
1180 | struct smap new; |
1181 | smap_init(&new); | |
1182 | smap_add(&new, "peer", router_port); | |
1183 | if (chassis) { | |
17bac0ff | 1184 | smap_add(&new, "l3gateway-chassis", chassis); |
c1645003 GS |
1185 | } |
1186 | sbrec_port_binding_set_options(op->sb, &new); | |
1187 | smap_destroy(&new); | |
9975d7be | 1188 | } |
0ee00741 HK |
1189 | sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name); |
1190 | sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag); | |
1191 | sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses, | |
1192 | op->nbsp->n_addresses); | |
9975d7be | 1193 | } |
5868eb24 BP |
1194 | } |
1195 | ||
0bac7164 | 1196 | /* Updates the southbound Port_Binding table so that it contains the logical |
80f408f4 | 1197 | * switch ports specified by the northbound database. |
0bac7164 BP |
1198 | * |
1199 | * Initializes 'ports' to contain a "struct ovn_port" for every logical port, | |
1200 | * using the "struct ovn_datapath"s in 'datapaths' to look up logical | |
1201 | * datapaths. */ | |
5868eb24 BP |
1202 | static void |
1203 | build_ports(struct northd_context *ctx, struct hmap *datapaths, | |
1204 | struct hmap *ports) | |
1205 | { | |
1206 | struct ovs_list sb_only, nb_only, both; | |
1207 | ||
1208 | join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both); | |
1209 | ||
1210 | /* For logical ports that are in both databases, update the southbound | |
1211 | * record based on northbound data. Also index the in-use tunnel_keys. */ | |
1212 | struct ovn_port *op, *next; | |
1213 | LIST_FOR_EACH_SAFE (op, next, list, &both) { | |
1214 | ovn_port_update_sbrec(op); | |
1215 | ||
1216 | add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key); | |
1217 | if (op->sb->tunnel_key > op->od->port_key_hint) { | |
1218 | op->od->port_key_hint = op->sb->tunnel_key; | |
1219 | } | |
1220 | } | |
1221 | ||
1222 | /* Add southbound record for each unmatched northbound record. */ | |
1223 | LIST_FOR_EACH_SAFE (op, next, list, &nb_only) { | |
1224 | uint16_t tunnel_key = ovn_port_allocate_key(op->od); | |
1225 | if (!tunnel_key) { | |
1226 | continue; | |
1227 | } | |
1228 | ||
1229 | op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn); | |
1230 | ovn_port_update_sbrec(op); | |
1231 | ||
1232 | sbrec_port_binding_set_logical_port(op->sb, op->key); | |
1233 | sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key); | |
1234 | } | |
1235 | ||
1236 | /* Delete southbound records without northbound matches. */ | |
1237 | LIST_FOR_EACH_SAFE(op, next, list, &sb_only) { | |
417e7e66 | 1238 | ovs_list_remove(&op->list); |
5868eb24 BP |
1239 | sbrec_port_binding_delete(op->sb); |
1240 | ovn_port_destroy(ports, op); | |
1241 | } | |
1242 | } | |
1243 | \f | |
1244 | #define OVN_MIN_MULTICAST 32768 | |
1245 | #define OVN_MAX_MULTICAST 65535 | |
1246 | ||
1247 | struct multicast_group { | |
1248 | const char *name; | |
1249 | uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */ | |
1250 | }; | |
1251 | ||
1252 | #define MC_FLOOD "_MC_flood" | |
1253 | static const struct multicast_group mc_flood = { MC_FLOOD, 65535 }; | |
1254 | ||
1255 | #define MC_UNKNOWN "_MC_unknown" | |
1256 | static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 }; | |
1257 | ||
1258 | static bool | |
1259 | multicast_group_equal(const struct multicast_group *a, | |
1260 | const struct multicast_group *b) | |
1261 | { | |
1262 | return !strcmp(a->name, b->name) && a->key == b->key; | |
1263 | } | |
1264 | ||
1265 | /* Multicast group entry. */ | |
1266 | struct ovn_multicast { | |
1267 | struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */ | |
1268 | struct ovn_datapath *datapath; | |
1269 | const struct multicast_group *group; | |
1270 | ||
1271 | struct ovn_port **ports; | |
1272 | size_t n_ports, allocated_ports; | |
1273 | }; | |
1274 | ||
1275 | static uint32_t | |
1276 | ovn_multicast_hash(const struct ovn_datapath *datapath, | |
1277 | const struct multicast_group *group) | |
1278 | { | |
1279 | return hash_pointer(datapath, group->key); | |
1280 | } | |
1281 | ||
1282 | static struct ovn_multicast * | |
1283 | ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath, | |
1284 | const struct multicast_group *group) | |
1285 | { | |
1286 | struct ovn_multicast *mc; | |
1287 | ||
1288 | HMAP_FOR_EACH_WITH_HASH (mc, hmap_node, | |
1289 | ovn_multicast_hash(datapath, group), mcgroups) { | |
1290 | if (mc->datapath == datapath | |
1291 | && multicast_group_equal(mc->group, group)) { | |
1292 | return mc; | |
4edcdcf4 RB |
1293 | } |
1294 | } | |
5868eb24 BP |
1295 | return NULL; |
1296 | } | |
1297 | ||
1298 | static void | |
1299 | ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group, | |
1300 | struct ovn_port *port) | |
1301 | { | |
1302 | struct ovn_datapath *od = port->od; | |
1303 | struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group); | |
1304 | if (!mc) { | |
1305 | mc = xmalloc(sizeof *mc); | |
1306 | hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group)); | |
1307 | mc->datapath = od; | |
1308 | mc->group = group; | |
1309 | mc->n_ports = 0; | |
1310 | mc->allocated_ports = 4; | |
1311 | mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports); | |
1312 | } | |
1313 | if (mc->n_ports >= mc->allocated_ports) { | |
1314 | mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports, | |
1315 | sizeof *mc->ports); | |
1316 | } | |
1317 | mc->ports[mc->n_ports++] = port; | |
1318 | } | |
4edcdcf4 | 1319 | |
5868eb24 BP |
1320 | static void |
1321 | ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc) | |
1322 | { | |
1323 | if (mc) { | |
1324 | hmap_remove(mcgroups, &mc->hmap_node); | |
1325 | free(mc->ports); | |
1326 | free(mc); | |
1327 | } | |
1328 | } | |
4edcdcf4 | 1329 | |
5868eb24 BP |
1330 | static void |
1331 | ovn_multicast_update_sbrec(const struct ovn_multicast *mc, | |
1332 | const struct sbrec_multicast_group *sb) | |
1333 | { | |
1334 | struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports); | |
1335 | for (size_t i = 0; i < mc->n_ports; i++) { | |
1336 | ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb); | |
1337 | } | |
1338 | sbrec_multicast_group_set_ports(sb, ports, mc->n_ports); | |
1339 | free(ports); | |
4edcdcf4 | 1340 | } |
bd39395f | 1341 | \f |
48605550 | 1342 | /* Logical flow generation. |
bd39395f | 1343 | * |
48605550 | 1344 | * This code generates the Logical_Flow table in the southbound database, as a |
bd39395f BP |
1345 | * function of most of the northbound database. |
1346 | */ | |
1347 | ||
5868eb24 BP |
1348 | struct ovn_lflow { |
1349 | struct hmap_node hmap_node; | |
bd39395f | 1350 | |
5868eb24 | 1351 | struct ovn_datapath *od; |
880fcd14 | 1352 | enum ovn_stage stage; |
5868eb24 BP |
1353 | uint16_t priority; |
1354 | char *match; | |
1355 | char *actions; | |
bd39395f BP |
1356 | }; |
1357 | ||
1358 | static size_t | |
5868eb24 | 1359 | ovn_lflow_hash(const struct ovn_lflow *lflow) |
bd39395f | 1360 | { |
5868eb24 | 1361 | size_t hash = uuid_hash(&lflow->od->key); |
880fcd14 | 1362 | hash = hash_2words((lflow->stage << 16) | lflow->priority, hash); |
5868eb24 BP |
1363 | hash = hash_string(lflow->match, hash); |
1364 | return hash_string(lflow->actions, hash); | |
bd39395f BP |
1365 | } |
1366 | ||
5868eb24 BP |
1367 | static bool |
1368 | ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b) | |
1369 | { | |
1370 | return (a->od == b->od | |
880fcd14 | 1371 | && a->stage == b->stage |
5868eb24 BP |
1372 | && a->priority == b->priority |
1373 | && !strcmp(a->match, b->match) | |
1374 | && !strcmp(a->actions, b->actions)); | |
1375 | } | |
1376 | ||
1377 | static void | |
1378 | ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od, | |
880fcd14 | 1379 | enum ovn_stage stage, uint16_t priority, |
5868eb24 | 1380 | char *match, char *actions) |
bd39395f | 1381 | { |
5868eb24 | 1382 | lflow->od = od; |
880fcd14 | 1383 | lflow->stage = stage; |
5868eb24 BP |
1384 | lflow->priority = priority; |
1385 | lflow->match = match; | |
1386 | lflow->actions = actions; | |
bd39395f BP |
1387 | } |
1388 | ||
48605550 | 1389 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
bd39395f | 1390 | static void |
5868eb24 | 1391 | ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od, |
880fcd14 | 1392 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
1393 | const char *match, const char *actions) |
1394 | { | |
9a9961d2 BP |
1395 | ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od)); |
1396 | ||
5868eb24 | 1397 | struct ovn_lflow *lflow = xmalloc(sizeof *lflow); |
880fcd14 | 1398 | ovn_lflow_init(lflow, od, stage, priority, |
5868eb24 BP |
1399 | xstrdup(match), xstrdup(actions)); |
1400 | hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow)); | |
1401 | } | |
1402 | ||
1403 | static struct ovn_lflow * | |
1404 | ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od, | |
880fcd14 | 1405 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
1406 | const char *match, const char *actions) |
1407 | { | |
1408 | struct ovn_lflow target; | |
880fcd14 | 1409 | ovn_lflow_init(&target, od, stage, priority, |
5868eb24 BP |
1410 | CONST_CAST(char *, match), CONST_CAST(char *, actions)); |
1411 | ||
1412 | struct ovn_lflow *lflow; | |
1413 | HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target), | |
1414 | lflows) { | |
1415 | if (ovn_lflow_equal(lflow, &target)) { | |
1416 | return lflow; | |
bd39395f BP |
1417 | } |
1418 | } | |
5868eb24 BP |
1419 | return NULL; |
1420 | } | |
bd39395f | 1421 | |
5868eb24 BP |
1422 | static void |
1423 | ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow) | |
1424 | { | |
1425 | if (lflow) { | |
1426 | hmap_remove(lflows, &lflow->hmap_node); | |
1427 | free(lflow->match); | |
1428 | free(lflow->actions); | |
1429 | free(lflow); | |
1430 | } | |
bd39395f BP |
1431 | } |
1432 | ||
bd39395f | 1433 | /* Appends port security constraints on L2 address field 'eth_addr_field' |
e93b43d6 JP |
1434 | * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs' |
1435 | * elements, is the collection of port_security constraints from an | |
1436 | * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */ | |
bd39395f | 1437 | static void |
685f4dfe | 1438 | build_port_security_l2(const char *eth_addr_field, |
e93b43d6 JP |
1439 | struct lport_addresses *ps_addrs, |
1440 | unsigned int n_ps_addrs, | |
685f4dfe | 1441 | struct ds *match) |
bd39395f | 1442 | { |
e93b43d6 JP |
1443 | if (!n_ps_addrs) { |
1444 | return; | |
1445 | } | |
bd39395f | 1446 | |
e93b43d6 | 1447 | ds_put_format(match, " && %s == {", eth_addr_field); |
f7cb14cd | 1448 | |
e93b43d6 JP |
1449 | for (size_t i = 0; i < n_ps_addrs; i++) { |
1450 | ds_put_format(match, "%s ", ps_addrs[i].ea_s); | |
bd39395f | 1451 | } |
f7cb14cd | 1452 | ds_chomp(match, ' '); |
bd39395f | 1453 | ds_put_cstr(match, "}"); |
bd39395f BP |
1454 | } |
1455 | ||
685f4dfe NS |
1456 | static void |
1457 | build_port_security_ipv6_nd_flow( | |
1458 | struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs, | |
1459 | int n_ipv6_addrs) | |
1460 | { | |
1461 | ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || " | |
1462 | "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || " | |
1463 | "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero), | |
1464 | ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero), | |
1465 | ETH_ADDR_ARGS(ea)); | |
1466 | if (!n_ipv6_addrs) { | |
1467 | ds_put_cstr(match, "))"); | |
1468 | return; | |
1469 | } | |
1470 | ||
1471 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
1472 | struct in6_addr lla; | |
1473 | in6_generate_lla(ea, &lla); | |
1474 | memset(ip6_str, 0, sizeof(ip6_str)); | |
1475 | ipv6_string_mapped(ip6_str, &lla); | |
1476 | ds_put_format(match, " && (nd.target == %s", ip6_str); | |
1477 | ||
1478 | for(int i = 0; i < n_ipv6_addrs; i++) { | |
1479 | memset(ip6_str, 0, sizeof(ip6_str)); | |
1480 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
1481 | ds_put_format(match, " || nd.target == %s", ip6_str); | |
1482 | } | |
1483 | ||
1484 | ds_put_format(match, ")))"); | |
1485 | } | |
1486 | ||
1487 | static void | |
1488 | build_port_security_ipv6_flow( | |
1489 | enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea, | |
1490 | struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs) | |
1491 | { | |
1492 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
1493 | ||
1494 | ds_put_format(match, " && %s == {", | |
1495 | pipeline == P_IN ? "ip6.src" : "ip6.dst"); | |
1496 | ||
1497 | /* Allow link-local address. */ | |
1498 | struct in6_addr lla; | |
1499 | in6_generate_lla(ea, &lla); | |
1500 | ipv6_string_mapped(ip6_str, &lla); | |
1501 | ds_put_format(match, "%s, ", ip6_str); | |
1502 | ||
9e687b23 DL |
1503 | /* Allow ip6.dst=ff00::/8 for multicast packets */ |
1504 | if (pipeline == P_OUT) { | |
1505 | ds_put_cstr(match, "ff00::/8, "); | |
1506 | } | |
685f4dfe NS |
1507 | for(int i = 0; i < n_ipv6_addrs; i++) { |
1508 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
9e687b23 | 1509 | ds_put_format(match, "%s, ", ip6_str); |
685f4dfe | 1510 | } |
9e687b23 DL |
1511 | /* Replace ", " by "}". */ |
1512 | ds_chomp(match, ' '); | |
1513 | ds_chomp(match, ','); | |
685f4dfe NS |
1514 | ds_put_cstr(match, "}"); |
1515 | } | |
1516 | ||
1517 | /** | |
1518 | * Build port security constraints on ARP and IPv6 ND fields | |
1519 | * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage. | |
1520 | * | |
1521 | * For each port security of the logical port, following | |
1522 | * logical flows are added | |
1523 | * - If the port security has no IP (both IPv4 and IPv6) or | |
1524 | * if it has IPv4 address(es) | |
1525 | * - Priority 90 flow to allow ARP packets for known MAC addresses | |
1526 | * in the eth.src and arp.spa fields. If the port security | |
1527 | * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field. | |
1528 | * | |
1529 | * - If the port security has no IP (both IPv4 and IPv6) or | |
1530 | * if it has IPv6 address(es) | |
1531 | * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses | |
1532 | * in the eth.src and nd.sll/nd.tll fields. If the port security | |
1533 | * has IPv6 addresses, allow known IPv6 addresses in the nd.target field | |
1534 | * for IPv6 Neighbor Advertisement packet. | |
1535 | * | |
1536 | * - Priority 80 flow to drop ARP and IPv6 ND packets. | |
1537 | */ | |
1538 | static void | |
1539 | build_port_security_nd(struct ovn_port *op, struct hmap *lflows) | |
1540 | { | |
e93b43d6 JP |
1541 | struct ds match = DS_EMPTY_INITIALIZER; |
1542 | ||
1543 | for (size_t i = 0; i < op->n_ps_addrs; i++) { | |
1544 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 1545 | |
e93b43d6 | 1546 | bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs); |
685f4dfe | 1547 | |
e93b43d6 JP |
1548 | ds_clear(&match); |
1549 | if (ps->n_ipv4_addrs || no_ip) { | |
1550 | ds_put_format(&match, | |
1551 | "inport == %s && eth.src == %s && arp.sha == %s", | |
1552 | op->json_key, ps->ea_s, ps->ea_s); | |
685f4dfe | 1553 | |
e93b43d6 JP |
1554 | if (ps->n_ipv4_addrs) { |
1555 | ds_put_cstr(&match, " && arp.spa == {"); | |
f95523c0 | 1556 | for (size_t j = 0; j < ps->n_ipv4_addrs; j++) { |
7d9d86ad NS |
1557 | /* When the netmask is applied, if the host portion is |
1558 | * non-zero, the host can only use the specified | |
1559 | * address in the arp.spa. If zero, the host is allowed | |
1560 | * to use any address in the subnet. */ | |
f95523c0 JP |
1561 | if (ps->ipv4_addrs[j].plen == 32 |
1562 | || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) { | |
1563 | ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s); | |
7d9d86ad | 1564 | } else { |
e93b43d6 | 1565 | ds_put_format(&match, "%s/%d", |
f95523c0 JP |
1566 | ps->ipv4_addrs[j].network_s, |
1567 | ps->ipv4_addrs[j].plen); | |
7d9d86ad | 1568 | } |
e93b43d6 | 1569 | ds_put_cstr(&match, ", "); |
685f4dfe NS |
1570 | } |
1571 | ds_chomp(&match, ' '); | |
e93b43d6 JP |
1572 | ds_chomp(&match, ','); |
1573 | ds_put_cstr(&match, "}"); | |
685f4dfe NS |
1574 | } |
1575 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, | |
1576 | ds_cstr(&match), "next;"); | |
685f4dfe NS |
1577 | } |
1578 | ||
e93b43d6 JP |
1579 | if (ps->n_ipv6_addrs || no_ip) { |
1580 | ds_clear(&match); | |
1581 | ds_put_format(&match, "inport == %s && eth.src == %s", | |
1582 | op->json_key, ps->ea_s); | |
1583 | build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs, | |
1584 | ps->n_ipv6_addrs); | |
685f4dfe NS |
1585 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, |
1586 | ds_cstr(&match), "next;"); | |
685f4dfe | 1587 | } |
685f4dfe NS |
1588 | } |
1589 | ||
e93b43d6 JP |
1590 | ds_clear(&match); |
1591 | ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key); | |
685f4dfe | 1592 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80, |
e93b43d6 JP |
1593 | ds_cstr(&match), "drop;"); |
1594 | ds_destroy(&match); | |
685f4dfe NS |
1595 | } |
1596 | ||
1597 | /** | |
1598 | * Build port security constraints on IPv4 and IPv6 src and dst fields | |
1599 | * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage. | |
1600 | * | |
1601 | * For each port security of the logical port, following | |
1602 | * logical flows are added | |
1603 | * - If the port security has IPv4 addresses, | |
1604 | * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses | |
1605 | * | |
1606 | * - If the port security has IPv6 addresses, | |
1607 | * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses | |
1608 | * | |
1609 | * - If the port security has IPv4 addresses or IPv6 addresses or both | |
1610 | * - Priority 80 flow to drop all IPv4 and IPv6 traffic | |
1611 | */ | |
1612 | static void | |
1613 | build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, | |
1614 | struct hmap *lflows) | |
1615 | { | |
1616 | char *port_direction; | |
1617 | enum ovn_stage stage; | |
1618 | if (pipeline == P_IN) { | |
1619 | port_direction = "inport"; | |
1620 | stage = S_SWITCH_IN_PORT_SEC_IP; | |
1621 | } else { | |
1622 | port_direction = "outport"; | |
1623 | stage = S_SWITCH_OUT_PORT_SEC_IP; | |
1624 | } | |
1625 | ||
e93b43d6 JP |
1626 | for (size_t i = 0; i < op->n_ps_addrs; i++) { |
1627 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 1628 | |
e93b43d6 | 1629 | if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) { |
685f4dfe NS |
1630 | continue; |
1631 | } | |
1632 | ||
e93b43d6 | 1633 | if (ps->n_ipv4_addrs) { |
685f4dfe NS |
1634 | struct ds match = DS_EMPTY_INITIALIZER; |
1635 | if (pipeline == P_IN) { | |
9e687b23 DL |
1636 | /* Permit use of the unspecified address for DHCP discovery */ |
1637 | struct ds dhcp_match = DS_EMPTY_INITIALIZER; | |
1638 | ds_put_format(&dhcp_match, "inport == %s" | |
e93b43d6 | 1639 | " && eth.src == %s" |
9e687b23 DL |
1640 | " && ip4.src == 0.0.0.0" |
1641 | " && ip4.dst == 255.255.255.255" | |
e93b43d6 JP |
1642 | " && udp.src == 68 && udp.dst == 67", |
1643 | op->json_key, ps->ea_s); | |
9e687b23 DL |
1644 | ovn_lflow_add(lflows, op->od, stage, 90, |
1645 | ds_cstr(&dhcp_match), "next;"); | |
1646 | ds_destroy(&dhcp_match); | |
e93b43d6 | 1647 | ds_put_format(&match, "inport == %s && eth.src == %s" |
9e687b23 | 1648 | " && ip4.src == {", op->json_key, |
e93b43d6 | 1649 | ps->ea_s); |
685f4dfe | 1650 | } else { |
e93b43d6 | 1651 | ds_put_format(&match, "outport == %s && eth.dst == %s" |
685f4dfe | 1652 | " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ", |
e93b43d6 | 1653 | op->json_key, ps->ea_s); |
685f4dfe NS |
1654 | } |
1655 | ||
f95523c0 JP |
1656 | for (int j = 0; j < ps->n_ipv4_addrs; j++) { |
1657 | ovs_be32 mask = ps->ipv4_addrs[j].mask; | |
7d9d86ad NS |
1658 | /* When the netmask is applied, if the host portion is |
1659 | * non-zero, the host can only use the specified | |
1660 | * address. If zero, the host is allowed to use any | |
1661 | * address in the subnet. | |
e93b43d6 | 1662 | */ |
f95523c0 JP |
1663 | if (ps->ipv4_addrs[j].plen == 32 |
1664 | || ps->ipv4_addrs[j].addr & ~mask) { | |
1665 | ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s); | |
1666 | if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) { | |
e93b43d6 JP |
1667 | /* Host is also allowed to receive packets to the |
1668 | * broadcast address in the specified subnet. */ | |
1669 | ds_put_format(&match, ", %s", | |
f95523c0 | 1670 | ps->ipv4_addrs[j].bcast_s); |
7d9d86ad NS |
1671 | } |
1672 | } else { | |
1673 | /* host portion is zero */ | |
f95523c0 JP |
1674 | ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s, |
1675 | ps->ipv4_addrs[j].plen); | |
7d9d86ad NS |
1676 | } |
1677 | ds_put_cstr(&match, ", "); | |
685f4dfe NS |
1678 | } |
1679 | ||
1680 | /* Replace ", " by "}". */ | |
1681 | ds_chomp(&match, ' '); | |
1682 | ds_chomp(&match, ','); | |
1683 | ds_put_cstr(&match, "}"); | |
1684 | ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;"); | |
1685 | ds_destroy(&match); | |
685f4dfe NS |
1686 | } |
1687 | ||
e93b43d6 | 1688 | if (ps->n_ipv6_addrs) { |
685f4dfe | 1689 | struct ds match = DS_EMPTY_INITIALIZER; |
9e687b23 DL |
1690 | if (pipeline == P_IN) { |
1691 | /* Permit use of unspecified address for duplicate address | |
1692 | * detection */ | |
1693 | struct ds dad_match = DS_EMPTY_INITIALIZER; | |
1694 | ds_put_format(&dad_match, "inport == %s" | |
e93b43d6 | 1695 | " && eth.src == %s" |
9e687b23 DL |
1696 | " && ip6.src == ::" |
1697 | " && ip6.dst == ff02::/16" | |
1698 | " && icmp6.type == {131, 135, 143}", op->json_key, | |
e93b43d6 | 1699 | ps->ea_s); |
9e687b23 DL |
1700 | ovn_lflow_add(lflows, op->od, stage, 90, |
1701 | ds_cstr(&dad_match), "next;"); | |
1702 | ds_destroy(&dad_match); | |
1703 | } | |
e93b43d6 | 1704 | ds_put_format(&match, "%s == %s && %s == %s", |
685f4dfe | 1705 | port_direction, op->json_key, |
e93b43d6 JP |
1706 | pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s); |
1707 | build_port_security_ipv6_flow(pipeline, &match, ps->ea, | |
1708 | ps->ipv6_addrs, ps->n_ipv6_addrs); | |
685f4dfe NS |
1709 | ovn_lflow_add(lflows, op->od, stage, 90, |
1710 | ds_cstr(&match), "next;"); | |
1711 | ds_destroy(&match); | |
685f4dfe NS |
1712 | } |
1713 | ||
e93b43d6 JP |
1714 | char *match = xasprintf("%s == %s && %s == %s && ip", |
1715 | port_direction, op->json_key, | |
1716 | pipeline == P_IN ? "eth.src" : "eth.dst", | |
1717 | ps->ea_s); | |
685f4dfe NS |
1718 | ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;"); |
1719 | free(match); | |
1720 | } | |
f2a715b5 | 1721 | |
685f4dfe NS |
1722 | } |
1723 | ||
95a9a275 | 1724 | static bool |
80f408f4 | 1725 | lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) |
95a9a275 | 1726 | { |
80f408f4 | 1727 | return !lsp->enabled || *lsp->enabled; |
95a9a275 RB |
1728 | } |
1729 | ||
4c7bf534 | 1730 | static bool |
80f408f4 | 1731 | lsp_is_up(const struct nbrec_logical_switch_port *lsp) |
4c7bf534 | 1732 | { |
80f408f4 | 1733 | return !lsp->up || *lsp->up; |
4c7bf534 NS |
1734 | } |
1735 | ||
281977f7 NS |
1736 | static bool |
1737 | build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip, | |
1738 | struct ds *options_action, struct ds *response_action) | |
1739 | { | |
1740 | if (!op->nbsp->dhcpv4_options) { | |
1741 | /* CMS has disabled native DHCPv4 for this lport. */ | |
1742 | return false; | |
1743 | } | |
1744 | ||
1745 | ovs_be32 host_ip, mask; | |
1746 | char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip, | |
1747 | &mask); | |
1748 | if (error || ((offer_ip ^ host_ip) & mask)) { | |
1749 | /* Either | |
1750 | * - cidr defined is invalid or | |
1751 | * - the offer ip of the logical port doesn't belong to the cidr | |
1752 | * defined in the DHCPv4 options. | |
1753 | * */ | |
1754 | free(error); | |
1755 | return false; | |
1756 | } | |
1757 | ||
1758 | const char *server_ip = smap_get( | |
1759 | &op->nbsp->dhcpv4_options->options, "server_id"); | |
1760 | const char *server_mac = smap_get( | |
1761 | &op->nbsp->dhcpv4_options->options, "server_mac"); | |
1762 | const char *lease_time = smap_get( | |
1763 | &op->nbsp->dhcpv4_options->options, "lease_time"); | |
1764 | const char *router = smap_get( | |
1765 | &op->nbsp->dhcpv4_options->options, "router"); | |
1766 | ||
1767 | if (!(server_ip && server_mac && lease_time && router)) { | |
1768 | /* "server_id", "server_mac", "lease_time" and "router" should be | |
1769 | * present in the dhcp_options. */ | |
1770 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
1771 | VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s", | |
1772 | op->json_key); | |
1773 | return false; | |
1774 | } | |
1775 | ||
1776 | struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options); | |
1777 | smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options); | |
1778 | ||
1779 | /* server_mac is not DHCPv4 option, delete it from the smap. */ | |
1780 | smap_remove(&dhcpv4_options, "server_mac"); | |
1781 | char *netmask = xasprintf(IP_FMT, IP_ARGS(mask)); | |
1782 | smap_add(&dhcpv4_options, "netmask", netmask); | |
1783 | free(netmask); | |
1784 | ||
1785 | ds_put_format(options_action, | |
1786 | REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = " | |
1787 | IP_FMT", ", IP_ARGS(offer_ip)); | |
1788 | struct smap_node *node; | |
1789 | SMAP_FOR_EACH(node, &dhcpv4_options) { | |
1790 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); | |
1791 | } | |
1792 | ||
1793 | ds_chomp(options_action, ' '); | |
1794 | ds_chomp(options_action, ','); | |
1795 | ds_put_cstr(options_action, "); next;"); | |
1796 | ||
1797 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
1798 | "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; " | |
bf143492 JP |
1799 | "udp.dst = 68; outport = inport; flags.loopback = 1; " |
1800 | "output;", | |
281977f7 NS |
1801 | server_mac, IP_ARGS(offer_ip), server_ip); |
1802 | ||
1803 | smap_destroy(&dhcpv4_options); | |
1804 | return true; | |
1805 | } | |
1806 | ||
33ac3c83 NS |
1807 | static bool |
1808 | build_dhcpv6_action(struct ovn_port *op, struct in6_addr *offer_ip, | |
1809 | struct ds *options_action, struct ds *response_action) | |
1810 | { | |
1811 | if (!op->nbsp->dhcpv6_options) { | |
1812 | /* CMS has disabled native DHCPv6 for this lport. */ | |
1813 | return false; | |
1814 | } | |
1815 | ||
1816 | struct in6_addr host_ip, mask; | |
1817 | ||
1818 | char *error = ipv6_parse_masked(op->nbsp->dhcpv6_options->cidr, &host_ip, | |
1819 | &mask); | |
1820 | if (error) { | |
1821 | free(error); | |
1822 | return false; | |
1823 | } | |
1824 | struct in6_addr ip6_mask = ipv6_addr_bitxor(offer_ip, &host_ip); | |
1825 | ip6_mask = ipv6_addr_bitand(&ip6_mask, &mask); | |
1826 | if (!ipv6_mask_is_any(&ip6_mask)) { | |
1827 | /* offer_ip doesn't belongs to the cidr defined in lport's DHCPv6 | |
1828 | * options.*/ | |
1829 | return false; | |
1830 | } | |
1831 | ||
1832 | /* "server_id" should be the MAC address. */ | |
1833 | const char *server_mac = smap_get(&op->nbsp->dhcpv6_options->options, | |
1834 | "server_id"); | |
1835 | struct eth_addr ea; | |
1836 | if (!server_mac || !eth_addr_from_string(server_mac, &ea)) { | |
1837 | /* "server_id" should be present in the dhcpv6_options. */ | |
1838 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1839 | VLOG_WARN_RL(&rl, "server_id not present in the DHCPv6 options" | |
1840 | " for lport %s", op->json_key); | |
1841 | return false; | |
1842 | } | |
1843 | ||
1844 | /* Get the link local IP of the DHCPv6 server from the server MAC. */ | |
1845 | struct in6_addr lla; | |
1846 | in6_generate_lla(ea, &lla); | |
1847 | ||
1848 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
1849 | ipv6_string_mapped(server_ip, &lla); | |
1850 | ||
1851 | char ia_addr[INET6_ADDRSTRLEN + 1]; | |
1852 | ipv6_string_mapped(ia_addr, offer_ip); | |
1853 | ||
1854 | ds_put_format(options_action, | |
1855 | REGBIT_DHCP_OPTS_RESULT" = put_dhcpv6_opts(ia_addr = %s, ", | |
1856 | ia_addr); | |
1857 | struct smap_node *node; | |
1858 | SMAP_FOR_EACH (node, &op->nbsp->dhcpv6_options->options) { | |
1859 | ds_put_format(options_action, "%s = %s, ", node->key, node->value); | |
1860 | } | |
1861 | ds_chomp(options_action, ' '); | |
1862 | ds_chomp(options_action, ','); | |
1863 | ds_put_cstr(options_action, "); next;"); | |
1864 | ||
1865 | ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; " | |
1866 | "ip6.dst = ip6.src; ip6.src = %s; udp.src = 547; " | |
1867 | "udp.dst = 546; outport = inport; flags.loopback = 1; " | |
1868 | "output;", | |
1869 | server_mac, server_ip); | |
1870 | return true; | |
1871 | } | |
1872 | ||
78aab811 JP |
1873 | static bool |
1874 | has_stateful_acl(struct ovn_datapath *od) | |
1875 | { | |
9975d7be BP |
1876 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
1877 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 JP |
1878 | if (!strcmp(acl->action, "allow-related")) { |
1879 | return true; | |
1880 | } | |
1881 | } | |
1882 | ||
1883 | return false; | |
1884 | } | |
1885 | ||
1886 | static void | |
2d018f9b GS |
1887 | build_pre_acls(struct ovn_datapath *od, struct hmap *lflows, |
1888 | struct hmap *ports) | |
78aab811 JP |
1889 | { |
1890 | bool has_stateful = has_stateful_acl(od); | |
48fcdb47 | 1891 | struct ovn_port *op; |
78aab811 JP |
1892 | |
1893 | /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are | |
1894 | * allowed by default. */ | |
880fcd14 BP |
1895 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); |
1896 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;"); | |
78aab811 | 1897 | |
78aab811 JP |
1898 | /* If there are any stateful ACL rules in this dapapath, we must |
1899 | * send all IP packets through the conntrack action, which handles | |
1900 | * defragmentation, in order to match L4 headers. */ | |
1901 | if (has_stateful) { | |
48fcdb47 | 1902 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 1903 | if (op->od == od && !strcmp(op->nbsp->type, "router")) { |
501f95e1 JP |
1904 | /* Can't use ct() for router ports. Consider the |
1905 | * following configuration: lp1(10.0.0.2) on | |
1906 | * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a | |
1907 | * ping from lp1 to lp2, First, the response will go | |
1908 | * through ct() with a zone for lp2 in the ls2 ingress | |
1909 | * pipeline on hostB. That ct zone knows about this | |
1910 | * connection. Next, it goes through ct() with the zone | |
1911 | * for the router port in the egress pipeline of ls2 on | |
1912 | * hostB. This zone does not know about the connection, | |
1913 | * as the icmp request went through the logical router | |
1914 | * on hostA, not hostB. This would only work with | |
1915 | * distributed conntrack state across all chassis. */ | |
1916 | struct ds match_in = DS_EMPTY_INITIALIZER; | |
1917 | struct ds match_out = DS_EMPTY_INITIALIZER; | |
1918 | ||
48fcdb47 WL |
1919 | ds_put_format(&match_in, "ip && inport == %s", op->json_key); |
1920 | ds_put_format(&match_out, "ip && outport == %s", op->json_key); | |
501f95e1 JP |
1921 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, |
1922 | ds_cstr(&match_in), "next;"); | |
1923 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, | |
1924 | ds_cstr(&match_out), "next;"); | |
48fcdb47 WL |
1925 | |
1926 | ds_destroy(&match_in); | |
1927 | ds_destroy(&match_out); | |
1928 | } | |
1929 | } | |
2d018f9b GS |
1930 | /* Ingress and Egress Pre-ACL Table (Priority 110). |
1931 | * | |
1932 | * Not to do conntrack on ND packets. */ | |
1933 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;"); | |
1934 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;"); | |
48fcdb47 | 1935 | |
78aab811 JP |
1936 | /* Ingress and Egress Pre-ACL Table (Priority 100). |
1937 | * | |
1938 | * Regardless of whether the ACL is "from-lport" or "to-lport", | |
1939 | * we need rules in both the ingress and egress table, because | |
facf8652 GS |
1940 | * the return traffic needs to be followed. |
1941 | * | |
1942 | * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send | |
1943 | * it to conntrack for tracking and defragmentation. */ | |
1944 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", | |
1945 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
1946 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", | |
1947 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2d018f9b GS |
1948 | } |
1949 | } | |
78aab811 | 1950 | |
7a15be69 GS |
1951 | /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and |
1952 | * 'ip_address'. The caller must free() the memory allocated for | |
1953 | * 'ip_address'. */ | |
1954 | static void | |
1955 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
1956 | uint16_t *port) | |
1957 | { | |
1958 | char *ip_str, *start, *next; | |
1959 | *ip_address = NULL; | |
1960 | *port = 0; | |
1961 | ||
1962 | next = start = xstrdup(key); | |
1963 | ip_str = strsep(&next, ":"); | |
1964 | if (!ip_str || !ip_str[0]) { | |
1965 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1966 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
1967 | free(start); | |
1968 | return; | |
1969 | } | |
1970 | ||
1971 | ovs_be32 ip, mask; | |
1972 | char *error = ip_parse_masked(ip_str, &ip, &mask); | |
1973 | if (error || mask != OVS_BE32_MAX) { | |
1974 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1975 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
1976 | free(start); | |
1977 | free(error); | |
1978 | return; | |
1979 | } | |
1980 | ||
1981 | int l4_port = 0; | |
1982 | if (next && next[0]) { | |
1983 | if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) { | |
1984 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1985 | VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key); | |
1986 | free(start); | |
1987 | return; | |
1988 | } | |
1989 | } | |
1990 | ||
1991 | *port = l4_port; | |
1992 | *ip_address = strdup(ip_str); | |
1993 | free(start); | |
1994 | } | |
1995 | ||
1996 | static void | |
1997 | build_pre_lb(struct ovn_datapath *od, struct hmap *lflows) | |
1998 | { | |
1999 | /* Allow all packets to go to next tables by default. */ | |
2000 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;"); | |
2001 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;"); | |
2002 | ||
2003 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
61591ad9 GS |
2004 | bool vip_configured = false; |
2005 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { | |
2006 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
2007 | struct smap *vips = &lb->vips; |
2008 | struct smap_node *node; | |
7a15be69 GS |
2009 | |
2010 | SMAP_FOR_EACH (node, vips) { | |
2011 | vip_configured = true; | |
2012 | ||
2013 | /* node->key contains IP:port or just IP. */ | |
2014 | char *ip_address = NULL; | |
2015 | uint16_t port; | |
2016 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
2017 | if (!ip_address) { | |
2018 | continue; | |
2019 | } | |
2020 | ||
2021 | if (!sset_contains(&all_ips, ip_address)) { | |
2022 | sset_add(&all_ips, ip_address); | |
2023 | } | |
2024 | ||
2025 | free(ip_address); | |
2026 | ||
2027 | /* Ignore L4 port information in the key because fragmented packets | |
2028 | * may not have L4 information. The pre-stateful table will send | |
2029 | * the packet through ct() action to de-fragment. In stateful | |
2030 | * table, we will eventually look at L4 information. */ | |
2031 | } | |
61591ad9 | 2032 | } |
7a15be69 | 2033 | |
61591ad9 GS |
2034 | /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send |
2035 | * packet to conntrack for defragmentation. */ | |
2036 | const char *ip_address; | |
2037 | SSET_FOR_EACH(ip_address, &all_ips) { | |
2038 | char *match = xasprintf("ip && ip4.dst == %s", ip_address); | |
2039 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, | |
2040 | 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2041 | free(match); | |
2042 | } | |
7a15be69 | 2043 | |
61591ad9 | 2044 | sset_destroy(&all_ips); |
7a15be69 | 2045 | |
61591ad9 GS |
2046 | if (vip_configured) { |
2047 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, | |
2048 | 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
7a15be69 GS |
2049 | } |
2050 | } | |
2051 | ||
facf8652 GS |
2052 | static void |
2053 | build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
2054 | { | |
2055 | /* Ingress and Egress pre-stateful Table (Priority 0): Packets are | |
2056 | * allowed by default. */ | |
2057 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;"); | |
2058 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;"); | |
2059 | ||
2060 | /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be | |
2061 | * sent to conntrack for tracking and defragmentation. */ | |
2062 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100, | |
2063 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
2064 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100, | |
2065 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
2066 | } | |
2067 | ||
2d018f9b GS |
2068 | static void |
2069 | build_acls(struct ovn_datapath *od, struct hmap *lflows) | |
2070 | { | |
2071 | bool has_stateful = has_stateful_acl(od); | |
e75451fe | 2072 | |
2d018f9b GS |
2073 | /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by |
2074 | * default. A related rule at priority 1 is added below if there | |
2075 | * are any stateful ACLs in this datapath. */ | |
2076 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); | |
2077 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); | |
2078 | ||
2079 | if (has_stateful) { | |
78aab811 JP |
2080 | /* Ingress and Egress ACL Table (Priority 1). |
2081 | * | |
2082 | * By default, traffic is allowed. This is partially handled by | |
2083 | * the Priority 0 ACL flows added earlier, but we also need to | |
2084 | * commit IP flows. This is because, while the initiater's | |
2085 | * direction may not have any stateful rules, the server's may | |
2086 | * and then its return traffic would not have an associated | |
cc58e1f2 RB |
2087 | * conntrack entry and would return "+invalid". |
2088 | * | |
2089 | * We use "ct_commit" for a connection that is not already known | |
2090 | * by the connection tracker. Once a connection is committed, | |
2091 | * subsequent packets will hit the flow at priority 0 that just | |
2092 | * uses "next;" | |
2093 | * | |
2094 | * We also check for established connections that have ct_label[0] | |
2095 | * set on them. That's a connection that was disallowed, but is | |
2096 | * now allowed by policy again since it hit this default-allow flow. | |
2097 | * We need to set ct_label[0]=0 to let the connection continue, | |
2098 | * which will be done by ct_commit() in the "stateful" stage. | |
2099 | * Subsequent packets will hit the flow at priority 0 that just | |
2100 | * uses "next;". */ | |
2101 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, | |
2102 | "ip && (!ct.est || (ct.est && ct_label[0] == 1))", | |
2103 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
2104 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, | |
2105 | "ip && (!ct.est || (ct.est && ct_label[0] == 1))", | |
2106 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
78aab811 JP |
2107 | |
2108 | /* Ingress and Egress ACL Table (Priority 65535). | |
2109 | * | |
cc58e1f2 RB |
2110 | * Always drop traffic that's in an invalid state. Also drop |
2111 | * reply direction packets for connections that have been marked | |
2112 | * for deletion (bit 0 of ct_label is set). | |
2113 | * | |
2114 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 2115 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
2116 | "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)", |
2117 | "drop;"); | |
880fcd14 | 2118 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
2119 | "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)", |
2120 | "drop;"); | |
78aab811 JP |
2121 | |
2122 | /* Ingress and Egress ACL Table (Priority 65535). | |
2123 | * | |
cc58e1f2 RB |
2124 | * Allow reply traffic that is part of an established |
2125 | * conntrack entry that has not been marked for deletion | |
2126 | * (bit 0 of ct_label). We only match traffic in the | |
2127 | * reply direction because we want traffic in the request | |
2128 | * direction to hit the currently defined policy from ACLs. | |
2129 | * | |
2130 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 2131 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
2132 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
2133 | "&& ct.rpl && ct_label[0] == 0", | |
78aab811 | 2134 | "next;"); |
880fcd14 | 2135 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
2136 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
2137 | "&& ct.rpl && ct_label[0] == 0", | |
78aab811 JP |
2138 | "next;"); |
2139 | ||
2140 | /* Ingress and Egress ACL Table (Priority 65535). | |
2141 | * | |
cc58e1f2 RB |
2142 | * Allow traffic that is related to an existing conntrack entry that |
2143 | * has not been marked for deletion (bit 0 of ct_label). | |
2144 | * | |
2145 | * This is enforced at a higher priority than ACLs can be defined. | |
78aab811 JP |
2146 | * |
2147 | * NOTE: This does not support related data sessions (eg, | |
2148 | * a dynamically negotiated FTP data channel), but will allow | |
2149 | * related traffic such as an ICMP Port Unreachable through | |
2150 | * that's generated from a non-listening UDP port. */ | |
880fcd14 | 2151 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
2152 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
2153 | "&& ct_label[0] == 0", | |
78aab811 | 2154 | "next;"); |
880fcd14 | 2155 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
2156 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
2157 | "&& ct_label[0] == 0", | |
78aab811 | 2158 | "next;"); |
e75451fe ZKL |
2159 | |
2160 | /* Ingress and Egress ACL Table (Priority 65535). | |
2161 | * | |
2162 | * Not to do conntrack on ND packets. */ | |
2163 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;"); | |
2164 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;"); | |
78aab811 JP |
2165 | } |
2166 | ||
2167 | /* Ingress or Egress ACL Table (Various priorities). */ | |
9975d7be BP |
2168 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
2169 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 | 2170 | bool ingress = !strcmp(acl->direction, "from-lport") ? true :false; |
880fcd14 | 2171 | enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL; |
78aab811 | 2172 | |
cc58e1f2 RB |
2173 | if (!strcmp(acl->action, "allow") |
2174 | || !strcmp(acl->action, "allow-related")) { | |
78aab811 JP |
2175 | /* If there are any stateful flows, we must even commit "allow" |
2176 | * actions. This is because, while the initiater's | |
2177 | * direction may not have any stateful rules, the server's | |
2178 | * may and then its return traffic would not have an | |
2179 | * associated conntrack entry and would return "+invalid". */ | |
cc58e1f2 RB |
2180 | if (!has_stateful) { |
2181 | ovn_lflow_add(lflows, od, stage, | |
2182 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2183 | acl->match, "next;"); | |
2184 | } else { | |
2185 | struct ds match = DS_EMPTY_INITIALIZER; | |
2186 | ||
2187 | /* Commit the connection tracking entry if it's a new | |
2188 | * connection that matches this ACL. After this commit, | |
2189 | * the reply traffic is allowed by a flow we create at | |
2190 | * priority 65535, defined earlier. | |
2191 | * | |
2192 | * It's also possible that a known connection was marked for | |
2193 | * deletion after a policy was deleted, but the policy was | |
2194 | * re-added while that connection is still known. We catch | |
2195 | * that case here and un-set ct_label[0] (which will be done | |
2196 | * by ct_commit in the "stateful" stage) to indicate that the | |
2197 | * connection should be allowed to resume. | |
2198 | */ | |
2199 | ds_put_format(&match, "((ct.new && !ct.est)" | |
2200 | " || (!ct.new && ct.est && !ct.rpl " | |
2201 | "&& ct_label[0] == 1)) " | |
2202 | "&& (%s)", acl->match); | |
2203 | ovn_lflow_add(lflows, od, stage, | |
2204 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2205 | ds_cstr(&match), | |
2206 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
2207 | ||
2208 | /* Match on traffic in the request direction for an established | |
2209 | * connection tracking entry that has not been marked for | |
2210 | * deletion. There is no need to commit here, so we can just | |
2211 | * proceed to the next table. We use this to ensure that this | |
2212 | * connection is still allowed by the currently defined | |
2213 | * policy. */ | |
2214 | ds_clear(&match); | |
2215 | ds_put_format(&match, | |
2216 | "!ct.new && ct.est && !ct.rpl" | |
2217 | " && ct_label[0] == 0 && (%s)", | |
2218 | acl->match); | |
2219 | ovn_lflow_add(lflows, od, stage, | |
2220 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2221 | ds_cstr(&match), "next;"); | |
2222 | ||
2223 | ds_destroy(&match); | |
2224 | } | |
2225 | } else if (!strcmp(acl->action, "drop") | |
2226 | || !strcmp(acl->action, "reject")) { | |
78aab811 JP |
2227 | struct ds match = DS_EMPTY_INITIALIZER; |
2228 | ||
cc58e1f2 RB |
2229 | /* XXX Need to support "reject", treat it as "drop;" for now. */ |
2230 | if (!strcmp(acl->action, "reject")) { | |
2231 | VLOG_INFO("reject is not a supported action"); | |
2232 | } | |
78aab811 | 2233 | |
cc58e1f2 RB |
2234 | /* The implementation of "drop" differs if stateful ACLs are in |
2235 | * use for this datapath. In that case, the actions differ | |
2236 | * depending on whether the connection was previously committed | |
2237 | * to the connection tracker with ct_commit. */ | |
2238 | if (has_stateful) { | |
2239 | /* If the packet is not part of an established connection, then | |
2240 | * we can simply drop it. */ | |
2241 | ds_put_format(&match, | |
2242 | "(!ct.est || (ct.est && ct_label[0] == 1)) " | |
2243 | "&& (%s)", | |
2244 | acl->match); | |
2245 | ovn_lflow_add(lflows, od, stage, acl->priority + | |
2246 | OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;"); | |
2247 | ||
2248 | /* For an existing connection without ct_label set, we've | |
2249 | * encountered a policy change. ACLs previously allowed | |
2250 | * this connection and we committed the connection tracking | |
2251 | * entry. Current policy says that we should drop this | |
2252 | * connection. First, we set bit 0 of ct_label to indicate | |
2253 | * that this connection is set for deletion. By not | |
2254 | * specifying "next;", we implicitly drop the packet after | |
2255 | * updating conntrack state. We would normally defer | |
2256 | * ct_commit() to the "stateful" stage, but since we're | |
2257 | * dropping the packet, we go ahead and do it here. */ | |
2258 | ds_clear(&match); | |
2259 | ds_put_format(&match, | |
2260 | "ct.est && ct_label[0] == 0 && (%s)", | |
2261 | acl->match); | |
2262 | ovn_lflow_add(lflows, od, stage, | |
2263 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2264 | ds_cstr(&match), "ct_commit(ct_label=1/1);"); | |
2265 | ||
2266 | ds_destroy(&match); | |
2267 | } else { | |
2268 | /* There are no stateful ACLs in use on this datapath, | |
2269 | * so a "drop" ACL is simply the "drop" logical flow action | |
2270 | * in all cases. */ | |
2271 | ovn_lflow_add(lflows, od, stage, | |
2272 | acl->priority + OVN_ACL_PRI_OFFSET, | |
2273 | acl->match, "drop;"); | |
2274 | } | |
78aab811 JP |
2275 | } |
2276 | } | |
281977f7 NS |
2277 | |
2278 | /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all | |
2279 | * logical ports of the datapath if the CMS has configured DHCPv4 options*/ | |
2280 | if (od->nbs && od->nbs->n_ports) { | |
2281 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
2282 | if (od->nbs->ports[i]->dhcpv4_options) { | |
2283 | const char *server_id = smap_get( | |
2284 | &od->nbs->ports[i]->dhcpv4_options->options, "server_id"); | |
2285 | const char *server_mac = smap_get( | |
2286 | &od->nbs->ports[i]->dhcpv4_options->options, "server_mac"); | |
2287 | const char *lease_time = smap_get( | |
2288 | &od->nbs->ports[i]->dhcpv4_options->options, "lease_time"); | |
2289 | const char *router = smap_get( | |
2290 | &od->nbs->ports[i]->dhcpv4_options->options, "router"); | |
2291 | if (server_id && server_mac && lease_time && router) { | |
2292 | struct ds match = DS_EMPTY_INITIALIZER; | |
2293 | const char *actions = | |
2294 | has_stateful ? "ct_commit; next;" : "next;"; | |
2295 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
2296 | "&& ip4.src == %s && udp && udp.src == 67 " | |
2297 | "&& udp.dst == 68", od->nbs->ports[i]->name, | |
2298 | server_mac, server_id); | |
2299 | ovn_lflow_add( | |
2300 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
2301 | actions); | |
2302 | } | |
2303 | } | |
33ac3c83 NS |
2304 | |
2305 | if (od->nbs->ports[i]->dhcpv6_options) { | |
2306 | const char *server_mac = smap_get( | |
2307 | &od->nbs->ports[i]->dhcpv6_options->options, "server_id"); | |
2308 | struct eth_addr ea; | |
2309 | if (server_mac && eth_addr_from_string(server_mac, &ea)) { | |
2310 | /* Get the link local IP of the DHCPv6 server from the | |
2311 | * server MAC. */ | |
2312 | struct in6_addr lla; | |
2313 | in6_generate_lla(ea, &lla); | |
2314 | ||
2315 | char server_ip[INET6_ADDRSTRLEN + 1]; | |
2316 | ipv6_string_mapped(server_ip, &lla); | |
2317 | ||
2318 | struct ds match = DS_EMPTY_INITIALIZER; | |
2319 | const char *actions = has_stateful ? "ct_commit; next;" : | |
2320 | "next;"; | |
2321 | ds_put_format(&match, "outport == \"%s\" && eth.src == %s " | |
2322 | "&& ip6.src == %s && udp && udp.src == 547 " | |
2323 | "&& udp.dst == 546", od->nbs->ports[i]->name, | |
2324 | server_mac, server_ip); | |
2325 | ovn_lflow_add( | |
2326 | lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match), | |
2327 | actions); | |
2328 | } | |
2329 | } | |
281977f7 NS |
2330 | } |
2331 | } | |
78aab811 JP |
2332 | } |
2333 | ||
7a15be69 GS |
2334 | static void |
2335 | build_lb(struct ovn_datapath *od, struct hmap *lflows) | |
2336 | { | |
2337 | /* Ingress and Egress LB Table (Priority 0): Packets are allowed by | |
2338 | * default. */ | |
2339 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); | |
2340 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); | |
2341 | ||
2342 | if (od->nbs->load_balancer) { | |
2343 | /* Ingress and Egress LB Table (Priority 65535). | |
2344 | * | |
2345 | * Send established traffic through conntrack for just NAT. */ | |
2346 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX, | |
2347 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
2348 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
2349 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX, | |
2350 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
2351 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
2352 | } | |
2353 | } | |
2354 | ||
fa313a8c GS |
2355 | static void |
2356 | build_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
2357 | { | |
2358 | /* Ingress and Egress stateful Table (Priority 0): Packets are | |
2359 | * allowed by default. */ | |
2360 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;"); | |
2361 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;"); | |
2362 | ||
2363 | /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be | |
cc58e1f2 RB |
2364 | * committed to conntrack. We always set ct_label[0] to 0 here as |
2365 | * any packet that makes it this far is part of a connection we | |
2366 | * want to allow to continue. */ | |
fa313a8c | 2367 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, |
cc58e1f2 | 2368 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
fa313a8c | 2369 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, |
cc58e1f2 | 2370 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
7a15be69 GS |
2371 | |
2372 | /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent | |
2373 | * through nat (without committing). | |
2374 | * | |
2375 | * REGBIT_CONNTRACK_COMMIT is set for new connections and | |
2376 | * REGBIT_CONNTRACK_NAT is set for established connections. So they | |
2377 | * don't overlap. | |
2378 | */ | |
2379 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, | |
2380 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
2381 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, | |
2382 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
2383 | ||
2384 | /* Load balancing rules for new connections get committed to conntrack | |
2385 | * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table | |
2386 | * a higher priority rule for load balancing below also commits the | |
2387 | * connection, so it is okay if we do not hit the above match on | |
2388 | * REGBIT_CONNTRACK_COMMIT. */ | |
61591ad9 GS |
2389 | for (int i = 0; i < od->nbs->n_load_balancer; i++) { |
2390 | struct nbrec_load_balancer *lb = od->nbs->load_balancer[i]; | |
7a15be69 GS |
2391 | struct smap *vips = &lb->vips; |
2392 | struct smap_node *node; | |
2393 | ||
2394 | SMAP_FOR_EACH (node, vips) { | |
2395 | uint16_t port = 0; | |
2396 | ||
2397 | /* node->key contains IP:port or just IP. */ | |
2398 | char *ip_address = NULL; | |
2399 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
2400 | if (!ip_address) { | |
2401 | continue; | |
2402 | } | |
2403 | ||
2404 | /* New connections in Ingress table. */ | |
2405 | char *action = xasprintf("ct_lb(%s);", node->value); | |
2406 | struct ds match = DS_EMPTY_INITIALIZER; | |
2407 | ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address); | |
2408 | if (port) { | |
2409 | if (lb->protocol && !strcmp(lb->protocol, "udp")) { | |
2410 | ds_put_format(&match, "&& udp && udp.dst == %d", port); | |
2411 | } else { | |
2412 | ds_put_format(&match, "&& tcp && tcp.dst == %d", port); | |
2413 | } | |
2414 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
2415 | 120, ds_cstr(&match), action); | |
2416 | } else { | |
2417 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
2418 | 110, ds_cstr(&match), action); | |
2419 | } | |
2420 | ||
2421 | ds_destroy(&match); | |
2422 | free(action); | |
2423 | } | |
2424 | } | |
fa313a8c GS |
2425 | } |
2426 | ||
bd39395f | 2427 | static void |
9975d7be BP |
2428 | build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, |
2429 | struct hmap *lflows, struct hmap *mcgroups) | |
bd39395f | 2430 | { |
5cff6b99 BP |
2431 | /* This flow table structure is documented in ovn-northd(8), so please |
2432 | * update ovn-northd.8.xml if you change anything. */ | |
2433 | ||
09b39248 JP |
2434 | struct ds match = DS_EMPTY_INITIALIZER; |
2435 | struct ds actions = DS_EMPTY_INITIALIZER; | |
2436 | ||
9975d7be | 2437 | /* Build pre-ACL and ACL tables for both ingress and egress. |
685f4dfe | 2438 | * Ingress tables 3 and 4. Egress tables 0 and 1. */ |
5868eb24 BP |
2439 | struct ovn_datapath *od; |
2440 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
2441 | if (!od->nbs) { |
2442 | continue; | |
2443 | } | |
2444 | ||
2d018f9b | 2445 | build_pre_acls(od, lflows, ports); |
7a15be69 | 2446 | build_pre_lb(od, lflows); |
facf8652 | 2447 | build_pre_stateful(od, lflows); |
2d018f9b | 2448 | build_acls(od, lflows); |
7a15be69 | 2449 | build_lb(od, lflows); |
fa313a8c | 2450 | build_stateful(od, lflows); |
9975d7be BP |
2451 | } |
2452 | ||
2453 | /* Logical switch ingress table 0: Admission control framework (priority | |
2454 | * 100). */ | |
2455 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2456 | if (!od->nbs) { | |
2457 | continue; | |
2458 | } | |
2459 | ||
bd39395f | 2460 | /* Logical VLANs not supported. */ |
685f4dfe | 2461 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present", |
091e3af9 | 2462 | "drop;"); |
bd39395f BP |
2463 | |
2464 | /* Broadcast/multicast source address is invalid. */ | |
685f4dfe | 2465 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", |
091e3af9 | 2466 | "drop;"); |
bd39395f | 2467 | |
35060cdc BP |
2468 | /* Port security flows have priority 50 (see below) and will continue |
2469 | * to the next table if packet source is acceptable. */ | |
bd39395f BP |
2470 | } |
2471 | ||
685f4dfe NS |
2472 | /* Logical switch ingress table 0: Ingress port security - L2 |
2473 | * (priority 50). | |
2474 | * Ingress table 1: Ingress port security - IP (priority 90 and 80) | |
2475 | * Ingress table 2: Ingress port security - ND (priority 90 and 80) | |
2476 | */ | |
5868eb24 BP |
2477 | struct ovn_port *op; |
2478 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2479 | if (!op->nbsp) { |
9975d7be BP |
2480 | continue; |
2481 | } | |
2482 | ||
0ee00741 | 2483 | if (!lsp_is_enabled(op->nbsp)) { |
96af668a BP |
2484 | /* Drop packets from disabled logical ports (since logical flow |
2485 | * tables are default-drop). */ | |
2486 | continue; | |
2487 | } | |
2488 | ||
09b39248 | 2489 | ds_clear(&match); |
9975d7be | 2490 | ds_put_format(&match, "inport == %s", op->json_key); |
e93b43d6 JP |
2491 | build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs, |
2492 | &match); | |
685f4dfe | 2493 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50, |
96af668a | 2494 | ds_cstr(&match), "next;"); |
685f4dfe | 2495 | |
0ee00741 | 2496 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
2497 | build_port_security_ip(P_IN, op, lflows); |
2498 | build_port_security_nd(op, lflows); | |
2499 | } | |
2500 | } | |
2501 | ||
2502 | /* Ingress table 1 and 2: Port security - IP and ND, by default goto next. | |
2503 | * (priority 0)*/ | |
2504 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2505 | if (!od->nbs) { | |
2506 | continue; | |
2507 | } | |
2508 | ||
2509 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;"); | |
2510 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); | |
5868eb24 | 2511 | } |
445a266a | 2512 | |
281977f7 NS |
2513 | /* Ingress table 9: ARP/ND responder, skip requests coming from localnet |
2514 | * ports. (priority 100). */ | |
fa128126 | 2515 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2516 | if (!op->nbsp) { |
fa128126 HZ |
2517 | continue; |
2518 | } | |
2519 | ||
0ee00741 | 2520 | if (!strcmp(op->nbsp->type, "localnet")) { |
09b39248 JP |
2521 | ds_clear(&match); |
2522 | ds_put_format(&match, "inport == %s", op->json_key); | |
e75451fe | 2523 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, |
09b39248 | 2524 | ds_cstr(&match), "next;"); |
fa128126 HZ |
2525 | } |
2526 | } | |
2527 | ||
94300e09 | 2528 | /* Ingress table 9: ARP/ND responder, reply for known IPs. |
fa128126 | 2529 | * (priority 50). */ |
57d143eb | 2530 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2531 | if (!op->nbsp) { |
57d143eb HZ |
2532 | continue; |
2533 | } | |
2534 | ||
4c7bf534 | 2535 | /* |
e75451fe | 2536 | * Add ARP/ND reply flows if either the |
4c7bf534 NS |
2537 | * - port is up or |
2538 | * - port type is router | |
2539 | */ | |
0ee00741 | 2540 | if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) { |
4c7bf534 NS |
2541 | continue; |
2542 | } | |
2543 | ||
e93b43d6 JP |
2544 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
2545 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
09b39248 | 2546 | ds_clear(&match); |
e93b43d6 JP |
2547 | ds_put_format(&match, "arp.tpa == %s && arp.op == 1", |
2548 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
09b39248 JP |
2549 | ds_clear(&actions); |
2550 | ds_put_format(&actions, | |
57d143eb | 2551 | "eth.dst = eth.src; " |
e93b43d6 | 2552 | "eth.src = %s; " |
57d143eb HZ |
2553 | "arp.op = 2; /* ARP reply */ " |
2554 | "arp.tha = arp.sha; " | |
e93b43d6 | 2555 | "arp.sha = %s; " |
57d143eb | 2556 | "arp.tpa = arp.spa; " |
e93b43d6 | 2557 | "arp.spa = %s; " |
57d143eb | 2558 | "outport = inport; " |
bf143492 | 2559 | "flags.loopback = 1; " |
57d143eb | 2560 | "output;", |
e93b43d6 JP |
2561 | op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, |
2562 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
e75451fe | 2563 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 2564 | ds_cstr(&match), ds_cstr(&actions)); |
57d143eb | 2565 | } |
7dc88496 | 2566 | |
6fdb7cd6 JP |
2567 | /* For ND solicitations, we need to listen for both the |
2568 | * unicast IPv6 address and its all-nodes multicast address, | |
2569 | * but always respond with the unicast IPv6 address. */ | |
2570 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
09b39248 | 2571 | ds_clear(&match); |
6fdb7cd6 JP |
2572 | ds_put_format(&match, |
2573 | "nd_ns && ip6.dst == {%s, %s} && nd.target == %s", | |
2574 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
2575 | op->lsp_addrs[i].ipv6_addrs[j].sn_addr_s, | |
2576 | op->lsp_addrs[i].ipv6_addrs[j].addr_s); | |
2577 | ||
09b39248 JP |
2578 | ds_clear(&actions); |
2579 | ds_put_format(&actions, | |
6fdb7cd6 JP |
2580 | "nd_na { " |
2581 | "eth.src = %s; " | |
2582 | "ip6.src = %s; " | |
2583 | "nd.target = %s; " | |
2584 | "nd.tll = %s; " | |
2585 | "outport = inport; " | |
bf143492 | 2586 | "flags.loopback = 1; " |
6fdb7cd6 JP |
2587 | "output; " |
2588 | "};", | |
2589 | op->lsp_addrs[i].ea_s, | |
2590 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
2591 | op->lsp_addrs[i].ipv6_addrs[j].addr_s, | |
2592 | op->lsp_addrs[i].ea_s); | |
e75451fe | 2593 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 2594 | ds_cstr(&match), ds_cstr(&actions)); |
e75451fe | 2595 | } |
57d143eb HZ |
2596 | } |
2597 | } | |
2598 | ||
94300e09 | 2599 | /* Ingress table 9: ARP/ND responder, by default goto next. |
fa128126 HZ |
2600 | * (priority 0)*/ |
2601 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2602 | if (!od->nbs) { | |
2603 | continue; | |
2604 | } | |
2605 | ||
e75451fe | 2606 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); |
fa128126 HZ |
2607 | } |
2608 | ||
281977f7 NS |
2609 | /* Logical switch ingress table 10 and 11: DHCP options and response |
2610 | * priority 100 flows. */ | |
2611 | HMAP_FOR_EACH (op, key_node, ports) { | |
2612 | if (!op->nbsp) { | |
2613 | continue; | |
2614 | } | |
2615 | ||
2616 | if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) { | |
2617 | /* Don't add the DHCP flows if the port is not enabled or if the | |
2618 | * port is a router port. */ | |
2619 | continue; | |
2620 | } | |
2621 | ||
33ac3c83 NS |
2622 | if (!op->nbsp->dhcpv4_options && !op->nbsp->dhcpv6_options) { |
2623 | /* CMS has disabled both native DHCPv4 and DHCPv6 for this lport. | |
2624 | */ | |
281977f7 NS |
2625 | continue; |
2626 | } | |
2627 | ||
2628 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { | |
2629 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
2630 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
2631 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
2632 | if (build_dhcpv4_action( | |
2633 | op, op->lsp_addrs[i].ipv4_addrs[j].addr, | |
2634 | &options_action, &response_action)) { | |
2635 | struct ds match = DS_EMPTY_INITIALIZER; | |
2636 | ds_put_format( | |
2637 | &match, "inport == %s && eth.src == %s && " | |
2638 | "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && " | |
2639 | "udp.src == 68 && udp.dst == 67", op->json_key, | |
2640 | op->lsp_addrs[i].ea_s); | |
2641 | ||
2642 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, | |
2643 | 100, ds_cstr(&match), | |
2644 | ds_cstr(&options_action)); | |
2645 | /* If REGBIT_DHCP_OPTS_RESULT is set, it means the | |
2646 | * put_dhcp_opts action is successful */ | |
2647 | ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT); | |
2648 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, | |
2649 | 100, ds_cstr(&match), | |
2650 | ds_cstr(&response_action)); | |
2651 | ds_destroy(&match); | |
2652 | ds_destroy(&options_action); | |
2653 | ds_destroy(&response_action); | |
2654 | break; | |
2655 | } | |
2656 | } | |
33ac3c83 NS |
2657 | |
2658 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
2659 | struct ds options_action = DS_EMPTY_INITIALIZER; | |
2660 | struct ds response_action = DS_EMPTY_INITIALIZER; | |
2661 | if (build_dhcpv6_action( | |
2662 | op, &op->lsp_addrs[i].ipv6_addrs[j].addr, | |
2663 | &options_action, &response_action)) { | |
2664 | struct ds match = DS_EMPTY_INITIALIZER; | |
2665 | ds_put_format( | |
2666 | &match, "inport == %s && eth.src == %s" | |
2667 | " && ip6.dst == ff02::1:2 && udp.src == 546 &&" | |
2668 | " udp.dst == 547", op->json_key, | |
2669 | op->lsp_addrs[i].ea_s); | |
2670 | ||
2671 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS, 100, | |
2672 | ds_cstr(&match), ds_cstr(&options_action)); | |
2673 | ||
2674 | /* If REGBIT_DHCP_OPTS_RESULT is set to 1, it means the | |
2675 | * put_dhcpv6_opts action is successful */ | |
2676 | ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT); | |
2677 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE, 100, | |
2678 | ds_cstr(&match), ds_cstr(&response_action)); | |
2679 | ds_destroy(&match); | |
2680 | ds_destroy(&options_action); | |
2681 | ds_destroy(&response_action); | |
2682 | break; | |
2683 | } | |
2684 | } | |
281977f7 NS |
2685 | } |
2686 | } | |
2687 | ||
2688 | /* Ingress table 10 and 11: DHCP options and response, by default goto next. | |
2689 | * (priority 0). */ | |
2690 | ||
2691 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2692 | if (!od->nbs) { | |
2693 | continue; | |
2694 | } | |
2695 | ||
2696 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;"); | |
2697 | ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); | |
2698 | } | |
2699 | ||
2700 | /* Ingress table 12: Destination lookup, broadcast and multicast handling | |
5868eb24 BP |
2701 | * (priority 100). */ |
2702 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2703 | if (!op->nbsp) { |
9975d7be BP |
2704 | continue; |
2705 | } | |
2706 | ||
0ee00741 | 2707 | if (lsp_is_enabled(op->nbsp)) { |
9975d7be | 2708 | ovn_multicast_add(mcgroups, &mc_flood, op); |
445a266a | 2709 | } |
5868eb24 BP |
2710 | } |
2711 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
2712 | if (!od->nbs) { |
2713 | continue; | |
2714 | } | |
2715 | ||
2716 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", | |
5868eb24 | 2717 | "outport = \""MC_FLOOD"\"; output;"); |
bd39395f | 2718 | } |
bd39395f | 2719 | |
281977f7 | 2720 | /* Ingress table 12: Destination lookup, unicast handling (priority 50), */ |
5868eb24 | 2721 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2722 | if (!op->nbsp) { |
9975d7be BP |
2723 | continue; |
2724 | } | |
2725 | ||
0ee00741 | 2726 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { |
74ff3298 | 2727 | struct eth_addr mac; |
5868eb24 | 2728 | |
0ee00741 | 2729 | if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) { |
09b39248 | 2730 | ds_clear(&match); |
9975d7be BP |
2731 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, |
2732 | ETH_ADDR_ARGS(mac)); | |
5868eb24 | 2733 | |
09b39248 | 2734 | ds_clear(&actions); |
9975d7be BP |
2735 | ds_put_format(&actions, "outport = %s; output;", op->json_key); |
2736 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
5868eb24 | 2737 | ds_cstr(&match), ds_cstr(&actions)); |
0ee00741 HK |
2738 | } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { |
2739 | if (lsp_is_enabled(op->nbsp)) { | |
9975d7be | 2740 | ovn_multicast_add(mcgroups, &mc_unknown, op); |
96af668a BP |
2741 | op->od->has_unknown = true; |
2742 | } | |
8639f9be ND |
2743 | } else if (!strcmp(op->nbsp->addresses[i], "dynamic")) { |
2744 | if (!op->nbsp->dynamic_addresses | |
2745 | || !eth_addr_from_string(op->nbsp->dynamic_addresses, | |
2746 | &mac)) { | |
2747 | continue; | |
2748 | } | |
2749 | ds_clear(&match); | |
2750 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, | |
2751 | ETH_ADDR_ARGS(mac)); | |
2752 | ||
2753 | ds_clear(&actions); | |
2754 | ds_put_format(&actions, "outport = %s; output;", op->json_key); | |
2755 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
2756 | ds_cstr(&match), ds_cstr(&actions)); | |
5868eb24 BP |
2757 | } else { |
2758 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
445a266a | 2759 | |
2fa326a3 BP |
2760 | VLOG_INFO_RL(&rl, |
2761 | "%s: invalid syntax '%s' in addresses column", | |
0ee00741 | 2762 | op->nbsp->name, op->nbsp->addresses[i]); |
445a266a BP |
2763 | } |
2764 | } | |
bd39395f BP |
2765 | } |
2766 | ||
281977f7 | 2767 | /* Ingress table 12: Destination lookup for unknown MACs (priority 0). */ |
5868eb24 | 2768 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
2769 | if (!od->nbs) { |
2770 | continue; | |
2771 | } | |
2772 | ||
5868eb24 | 2773 | if (od->has_unknown) { |
9975d7be | 2774 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", |
5868eb24 | 2775 | "outport = \""MC_UNKNOWN"\"; output;"); |
445a266a | 2776 | } |
bd39395f BP |
2777 | } |
2778 | ||
94300e09 JP |
2779 | /* Egress tables 6: Egress port security - IP (priority 0) |
2780 | * Egress table 7: Egress port security L2 - multicast/broadcast | |
2781 | * (priority 100). */ | |
5868eb24 | 2782 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
2783 | if (!od->nbs) { |
2784 | continue; | |
2785 | } | |
2786 | ||
685f4dfe NS |
2787 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;"); |
2788 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast", | |
091e3af9 | 2789 | "output;"); |
48f42f3a RB |
2790 | } |
2791 | ||
94300e09 | 2792 | /* Egress table 6: Egress port security - IP (priorities 90 and 80) |
685f4dfe NS |
2793 | * if port security enabled. |
2794 | * | |
94300e09 | 2795 | * Egress table 7: Egress port security - L2 (priorities 50 and 150). |
d770a830 BP |
2796 | * |
2797 | * Priority 50 rules implement port security for enabled logical port. | |
2798 | * | |
2799 | * Priority 150 rules drop packets to disabled logical ports, so that they | |
2800 | * don't even receive multicast or broadcast packets. */ | |
5868eb24 | 2801 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2802 | if (!op->nbsp) { |
9975d7be BP |
2803 | continue; |
2804 | } | |
2805 | ||
09b39248 | 2806 | ds_clear(&match); |
9975d7be | 2807 | ds_put_format(&match, "outport == %s", op->json_key); |
0ee00741 | 2808 | if (lsp_is_enabled(op->nbsp)) { |
e93b43d6 JP |
2809 | build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs, |
2810 | &match); | |
685f4dfe | 2811 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50, |
d770a830 BP |
2812 | ds_cstr(&match), "output;"); |
2813 | } else { | |
685f4dfe | 2814 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150, |
d770a830 BP |
2815 | ds_cstr(&match), "drop;"); |
2816 | } | |
eb00399e | 2817 | |
0ee00741 | 2818 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
2819 | build_port_security_ip(P_OUT, op, lflows); |
2820 | } | |
eb00399e | 2821 | } |
09b39248 JP |
2822 | |
2823 | ds_destroy(&match); | |
2824 | ds_destroy(&actions); | |
9975d7be | 2825 | } |
eb00399e | 2826 | |
9975d7be BP |
2827 | static bool |
2828 | lrport_is_enabled(const struct nbrec_logical_router_port *lrport) | |
2829 | { | |
2830 | return !lrport->enabled || *lrport->enabled; | |
2831 | } | |
2832 | ||
4685e523 JP |
2833 | /* Returns a string of the IP address of the router port 'op' that |
2834 | * overlaps with 'ip_s". If one is not found, returns NULL. | |
2835 | * | |
2836 | * The caller must not free the returned string. */ | |
2837 | static const char * | |
2838 | find_lrp_member_ip(const struct ovn_port *op, const char *ip_s) | |
2839 | { | |
6fdb7cd6 | 2840 | bool is_ipv4 = strchr(ip_s, '.') ? true : false; |
4685e523 | 2841 | |
6fdb7cd6 JP |
2842 | if (is_ipv4) { |
2843 | ovs_be32 ip; | |
4685e523 | 2844 | |
6fdb7cd6 JP |
2845 | if (!ip_parse(ip_s, &ip)) { |
2846 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2847 | VLOG_WARN_RL(&rl, "bad ip address %s", ip_s); | |
2848 | return NULL; | |
2849 | } | |
4685e523 | 2850 | |
6fdb7cd6 JP |
2851 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
2852 | const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i]; | |
2853 | ||
2854 | if (!((na->network ^ ip) & na->mask)) { | |
2855 | /* There should be only 1 interface that matches the | |
2856 | * supplied IP. Otherwise, it's a configuration error, | |
2857 | * because subnets of a router's interfaces should NOT | |
2858 | * overlap. */ | |
2859 | return na->addr_s; | |
2860 | } | |
2861 | } | |
2862 | } else { | |
2863 | struct in6_addr ip6; | |
2864 | ||
2865 | if (!ipv6_parse(ip_s, &ip6)) { | |
2866 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2867 | VLOG_WARN_RL(&rl, "bad ipv6 address %s", ip_s); | |
2868 | return NULL; | |
2869 | } | |
2870 | ||
2871 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
2872 | const struct ipv6_netaddr *na = &op->lrp_networks.ipv6_addrs[i]; | |
2873 | struct in6_addr xor_addr = ipv6_addr_bitxor(&na->network, &ip6); | |
2874 | struct in6_addr and_addr = ipv6_addr_bitand(&xor_addr, &na->mask); | |
2875 | ||
2876 | if (ipv6_is_zero(&and_addr)) { | |
2877 | /* There should be only 1 interface that matches the | |
2878 | * supplied IP. Otherwise, it's a configuration error, | |
2879 | * because subnets of a router's interfaces should NOT | |
2880 | * overlap. */ | |
2881 | return na->addr_s; | |
2882 | } | |
4685e523 JP |
2883 | } |
2884 | } | |
2885 | ||
2886 | return NULL; | |
2887 | } | |
2888 | ||
9975d7be | 2889 | static void |
0bac7164 | 2890 | add_route(struct hmap *lflows, const struct ovn_port *op, |
4685e523 JP |
2891 | const char *lrp_addr_s, const char *network_s, int plen, |
2892 | const char *gateway) | |
9975d7be | 2893 | { |
6fdb7cd6 | 2894 | bool is_ipv4 = strchr(network_s, '.') ? true : false; |
a63f7235 | 2895 | struct ds match = DS_EMPTY_INITIALIZER; |
6fdb7cd6 | 2896 | |
a63f7235 JP |
2897 | /* IPv6 link-local addresses must be scoped to the local router port. */ |
2898 | if (!is_ipv4) { | |
2899 | struct in6_addr network; | |
2900 | ovs_assert(ipv6_parse(network_s, &network)); | |
2901 | if (in6_is_lla(&network)) { | |
2902 | ds_put_format(&match, "inport == %s && ", op->json_key); | |
2903 | } | |
2904 | } | |
2905 | ds_put_format(&match, "ip%s.dst == %s/%d", is_ipv4 ? "4" : "6", | |
2906 | network_s, plen); | |
9975d7be BP |
2907 | |
2908 | struct ds actions = DS_EMPTY_INITIALIZER; | |
6fdb7cd6 JP |
2909 | ds_put_format(&actions, "ip.ttl--; %sreg0 = ", is_ipv4 ? "" : "xx"); |
2910 | ||
9975d7be | 2911 | if (gateway) { |
c9bdf7bd | 2912 | ds_put_cstr(&actions, gateway); |
9975d7be | 2913 | } else { |
6fdb7cd6 | 2914 | ds_put_format(&actions, "ip%s.dst", is_ipv4 ? "4" : "6"); |
9975d7be | 2915 | } |
4685e523 | 2916 | ds_put_format(&actions, "; " |
6fdb7cd6 | 2917 | "%sreg1 = %s; " |
4685e523 | 2918 | "eth.src = %s; " |
0bac7164 | 2919 | "outport = %s; " |
bf143492 | 2920 | "flags.loopback = 1; " |
0bac7164 | 2921 | "next;", |
6fdb7cd6 | 2922 | is_ipv4 ? "" : "xx", |
4685e523 JP |
2923 | lrp_addr_s, |
2924 | op->lrp_networks.ea_s, | |
2925 | op->json_key); | |
9975d7be BP |
2926 | |
2927 | /* The priority here is calculated to implement longest-prefix-match | |
2928 | * routing. */ | |
a63f7235 JP |
2929 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, |
2930 | ds_cstr(&match), ds_cstr(&actions)); | |
2931 | ds_destroy(&match); | |
9975d7be | 2932 | ds_destroy(&actions); |
9975d7be BP |
2933 | } |
2934 | ||
28dc3fe9 SR |
2935 | static void |
2936 | build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od, | |
2937 | struct hmap *ports, | |
2938 | const struct nbrec_logical_router_static_route *route) | |
2939 | { | |
6fdb7cd6 | 2940 | ovs_be32 nexthop; |
4685e523 | 2941 | const char *lrp_addr_s; |
6fdb7cd6 JP |
2942 | unsigned int plen; |
2943 | bool is_ipv4; | |
28dc3fe9 | 2944 | |
6fdb7cd6 JP |
2945 | /* Verify that the next hop is an IP address with an all-ones mask. */ |
2946 | char *error = ip_parse_cidr(route->nexthop, &nexthop, &plen); | |
2947 | if (!error) { | |
2948 | if (plen != 32) { | |
2949 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2950 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
2951 | return; | |
2952 | } | |
2953 | is_ipv4 = true; | |
2954 | } else { | |
28dc3fe9 | 2955 | free(error); |
6fdb7cd6 JP |
2956 | |
2957 | struct in6_addr ip6; | |
2958 | char *error = ipv6_parse_cidr(route->nexthop, &ip6, &plen); | |
2959 | if (!error) { | |
2960 | if (plen != 128) { | |
2961 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2962 | VLOG_WARN_RL(&rl, "bad next hop mask %s", route->nexthop); | |
2963 | return; | |
2964 | } | |
2965 | is_ipv4 = false; | |
2966 | } else { | |
2967 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2968 | VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop); | |
2969 | free(error); | |
2970 | return; | |
2971 | } | |
28dc3fe9 SR |
2972 | } |
2973 | ||
6fdb7cd6 JP |
2974 | char *prefix_s; |
2975 | if (is_ipv4) { | |
2976 | ovs_be32 prefix; | |
2977 | /* Verify that ip prefix is a valid IPv4 address. */ | |
2978 | error = ip_parse_cidr(route->ip_prefix, &prefix, &plen); | |
2979 | if (error) { | |
2980 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2981 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
2982 | route->ip_prefix); | |
2983 | free(error); | |
2984 | return; | |
2985 | } | |
2986 | prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & be32_prefix_mask(plen))); | |
2987 | } else { | |
2988 | /* Verify that ip prefix is a valid IPv6 address. */ | |
2989 | struct in6_addr prefix; | |
2990 | error = ipv6_parse_cidr(route->ip_prefix, &prefix, &plen); | |
2991 | if (error) { | |
2992 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2993 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", | |
2994 | route->ip_prefix); | |
2995 | free(error); | |
2996 | return; | |
2997 | } | |
2998 | struct in6_addr mask = ipv6_create_mask(plen); | |
2999 | struct in6_addr network = ipv6_addr_bitand(&prefix, &mask); | |
3000 | prefix_s = xmalloc(INET6_ADDRSTRLEN); | |
3001 | inet_ntop(AF_INET6, &network, prefix_s, INET6_ADDRSTRLEN); | |
28dc3fe9 SR |
3002 | } |
3003 | ||
3004 | /* Find the outgoing port. */ | |
3005 | struct ovn_port *out_port = NULL; | |
3006 | if (route->output_port) { | |
3007 | out_port = ovn_port_find(ports, route->output_port); | |
3008 | if (!out_port) { | |
3009 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3010 | VLOG_WARN_RL(&rl, "Bad out port %s for static route %s", | |
3011 | route->output_port, route->ip_prefix); | |
6fdb7cd6 | 3012 | goto free_prefix_s; |
28dc3fe9 | 3013 | } |
4685e523 | 3014 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
28dc3fe9 SR |
3015 | } else { |
3016 | /* output_port is not specified, find the | |
3017 | * router port matching the next hop. */ | |
3018 | int i; | |
3019 | for (i = 0; i < od->nbr->n_ports; i++) { | |
3020 | struct nbrec_logical_router_port *lrp = od->nbr->ports[i]; | |
3021 | out_port = ovn_port_find(ports, lrp->name); | |
3022 | if (!out_port) { | |
3023 | /* This should not happen. */ | |
3024 | continue; | |
3025 | } | |
3026 | ||
4685e523 JP |
3027 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
3028 | if (lrp_addr_s) { | |
28dc3fe9 SR |
3029 | break; |
3030 | } | |
3031 | } | |
28dc3fe9 SR |
3032 | } |
3033 | ||
4685e523 JP |
3034 | if (!lrp_addr_s) { |
3035 | /* There is no matched out port. */ | |
3036 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3037 | VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s", | |
3038 | route->ip_prefix, route->nexthop); | |
6fdb7cd6 | 3039 | goto free_prefix_s; |
4685e523 JP |
3040 | } |
3041 | ||
6fdb7cd6 JP |
3042 | add_route(lflows, out_port, lrp_addr_s, prefix_s, plen, route->nexthop); |
3043 | ||
3044 | free_prefix_s: | |
c9bdf7bd | 3045 | free(prefix_s); |
28dc3fe9 SR |
3046 | } |
3047 | ||
4685e523 | 3048 | static void |
6fdb7cd6 | 3049 | op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast) |
4685e523 JP |
3050 | { |
3051 | if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) { | |
3052 | ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s); | |
3053 | return; | |
3054 | } | |
3055 | ||
3056 | ds_put_cstr(ds, "{"); | |
3057 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
3058 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s); | |
3059 | if (add_bcast) { | |
3060 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s); | |
3061 | } | |
3062 | } | |
3063 | ds_chomp(ds, ' '); | |
3064 | ds_chomp(ds, ','); | |
3065 | ds_put_cstr(ds, "}"); | |
3066 | } | |
3067 | ||
6fdb7cd6 JP |
3068 | static void |
3069 | op_put_v6_networks(struct ds *ds, const struct ovn_port *op) | |
3070 | { | |
3071 | if (op->lrp_networks.n_ipv6_addrs == 1) { | |
3072 | ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s); | |
3073 | return; | |
3074 | } | |
3075 | ||
3076 | ds_put_cstr(ds, "{"); | |
3077 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
3078 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s); | |
3079 | } | |
3080 | ds_chomp(ds, ' '); | |
3081 | ds_chomp(ds, ','); | |
3082 | ds_put_cstr(ds, "}"); | |
3083 | } | |
3084 | ||
9975d7be BP |
3085 | static void |
3086 | build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, | |
3087 | struct hmap *lflows) | |
3088 | { | |
3089 | /* This flow table structure is documented in ovn-northd(8), so please | |
3090 | * update ovn-northd.8.xml if you change anything. */ | |
3091 | ||
09b39248 JP |
3092 | struct ds match = DS_EMPTY_INITIALIZER; |
3093 | struct ds actions = DS_EMPTY_INITIALIZER; | |
3094 | ||
9975d7be BP |
3095 | /* Logical router ingress table 0: Admission control framework. */ |
3096 | struct ovn_datapath *od; | |
3097 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3098 | if (!od->nbr) { | |
3099 | continue; | |
3100 | } | |
3101 | ||
3102 | /* Logical VLANs not supported. | |
3103 | * Broadcast/multicast source address is invalid. */ | |
3104 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, | |
3105 | "vlan.present || eth.src[40]", "drop;"); | |
3106 | } | |
3107 | ||
3108 | /* Logical router ingress table 0: match (priority 50). */ | |
3109 | struct ovn_port *op; | |
3110 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3111 | if (!op->nbrp) { |
9975d7be BP |
3112 | continue; |
3113 | } | |
3114 | ||
0ee00741 | 3115 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
3116 | /* Drop packets from disabled logical ports (since logical flow |
3117 | * tables are default-drop). */ | |
3118 | continue; | |
3119 | } | |
3120 | ||
09b39248 | 3121 | ds_clear(&match); |
4685e523 JP |
3122 | ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s", |
3123 | op->lrp_networks.ea_s, op->json_key); | |
9975d7be | 3124 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, |
09b39248 | 3125 | ds_cstr(&match), "next;"); |
9975d7be BP |
3126 | } |
3127 | ||
3128 | /* Logical router ingress table 1: IP Input. */ | |
78aab811 | 3129 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
3130 | if (!od->nbr) { |
3131 | continue; | |
3132 | } | |
3133 | ||
3134 | /* L3 admission control: drop multicast and broadcast source, localhost | |
3135 | * source or destination, and zero network source or destination | |
3136 | * (priority 100). */ | |
3137 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, | |
3138 | "ip4.mcast || " | |
3139 | "ip4.src == 255.255.255.255 || " | |
3140 | "ip4.src == 127.0.0.0/8 || " | |
3141 | "ip4.dst == 127.0.0.0/8 || " | |
3142 | "ip4.src == 0.0.0.0/8 || " | |
3143 | "ip4.dst == 0.0.0.0/8", | |
3144 | "drop;"); | |
3145 | ||
0bac7164 BP |
3146 | /* ARP reply handling. Use ARP replies to populate the logical |
3147 | * router's ARP table. */ | |
3148 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", | |
3149 | "put_arp(inport, arp.spa, arp.sha);"); | |
3150 | ||
9975d7be BP |
3151 | /* Drop Ethernet local broadcast. By definition this traffic should |
3152 | * not be forwarded.*/ | |
3153 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, | |
3154 | "eth.bcast", "drop;"); | |
3155 | ||
9975d7be BP |
3156 | /* TTL discard. |
3157 | * | |
3158 | * XXX Need to send ICMP time exceeded if !ip.later_frag. */ | |
09b39248 JP |
3159 | ds_clear(&match); |
3160 | ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}"); | |
3161 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, | |
3162 | ds_cstr(&match), "drop;"); | |
9975d7be | 3163 | |
c34a87b6 JP |
3164 | /* ND advertisement handling. Use advertisements to populate |
3165 | * the logical router's ARP/ND table. */ | |
3166 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "nd_na", | |
3167 | "put_nd(inport, nd.target, nd.tll);"); | |
3168 | ||
3169 | /* Lean from neighbor solicitations that were not directed at | |
3170 | * us. (A priority-90 flow will respond to requests to us and | |
3171 | * learn the sender's mac address. */ | |
3172 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 80, "nd_ns", | |
3173 | "put_nd(inport, ip6.src, nd.sll);"); | |
3174 | ||
9975d7be BP |
3175 | /* Pass other traffic not already handled to the next table for |
3176 | * routing. */ | |
3177 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); | |
78aab811 JP |
3178 | } |
3179 | ||
6fdb7cd6 | 3180 | /* Logical router ingress table 1: IP Input for IPv4. */ |
9975d7be | 3181 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3182 | if (!op->nbrp) { |
9975d7be BP |
3183 | continue; |
3184 | } | |
3185 | ||
9975d7be | 3186 | |
6fdb7cd6 JP |
3187 | if (op->lrp_networks.n_ipv4_addrs) { |
3188 | /* L3 admission control: drop packets that originate from an | |
3189 | * IPv4 address owned by the router or a broadcast address | |
3190 | * known to the router (priority 100). */ | |
3191 | ds_clear(&match); | |
3192 | ds_put_cstr(&match, "ip4.src == "); | |
3193 | op_put_v4_networks(&match, op, true); | |
3194 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, | |
3195 | ds_cstr(&match), "drop;"); | |
3196 | ||
3197 | /* ICMP echo reply. These flows reply to ICMP echo requests | |
3198 | * received for the router's IP address. Since packets only | |
3199 | * get here as part of the logical router datapath, the inport | |
3200 | * (i.e. the incoming locally attached net) does not matter. | |
3201 | * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ | |
3202 | ds_clear(&match); | |
3203 | ds_put_cstr(&match, "ip4.dst == "); | |
3204 | op_put_v4_networks(&match, op, false); | |
3205 | ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0"); | |
3206 | ||
3207 | ds_clear(&actions); | |
3208 | ds_put_format(&actions, | |
3209 | "ip4.dst <-> ip4.src; " | |
3210 | "ip.ttl = 255; " | |
3211 | "icmp4.type = 0; " | |
bf143492 | 3212 | "flags.loopback = 1; " |
6fdb7cd6 JP |
3213 | "next; "); |
3214 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
3215 | ds_cstr(&match), ds_cstr(&actions)); | |
3216 | } | |
dd7652e6 | 3217 | |
9975d7be BP |
3218 | /* ARP reply. These flows reply to ARP requests for the router's own |
3219 | * IP address. */ | |
4685e523 JP |
3220 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
3221 | ds_clear(&match); | |
3222 | ds_put_format(&match, | |
3223 | "inport == %s && arp.tpa == %s && arp.op == 1", | |
3224 | op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s); | |
3225 | ||
3226 | ds_clear(&actions); | |
3227 | ds_put_format(&actions, | |
3228 | "eth.dst = eth.src; " | |
3229 | "eth.src = %s; " | |
3230 | "arp.op = 2; /* ARP reply */ " | |
3231 | "arp.tha = arp.sha; " | |
3232 | "arp.sha = %s; " | |
3233 | "arp.tpa = arp.spa; " | |
3234 | "arp.spa = %s; " | |
3235 | "outport = %s; " | |
bf143492 | 3236 | "flags.loopback = 1; " |
4685e523 JP |
3237 | "output;", |
3238 | op->lrp_networks.ea_s, | |
3239 | op->lrp_networks.ea_s, | |
3240 | op->lrp_networks.ipv4_addrs[i].addr_s, | |
3241 | op->json_key); | |
3242 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
3243 | ds_cstr(&match), ds_cstr(&actions)); | |
3244 | } | |
9975d7be | 3245 | |
dde5ea7b GS |
3246 | ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat); |
3247 | size_t n_snat_ips = 0; | |
de297547 GS |
3248 | for (int i = 0; i < op->od->nbr->n_nat; i++) { |
3249 | const struct nbrec_nat *nat; | |
3250 | ||
3251 | nat = op->od->nbr->nat[i]; | |
3252 | ||
de297547 GS |
3253 | ovs_be32 ip; |
3254 | if (!ip_parse(nat->external_ip, &ip) || !ip) { | |
3255 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
dde5ea7b | 3256 | VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration " |
de297547 GS |
3257 | "for router %s", nat->external_ip, op->key); |
3258 | continue; | |
3259 | } | |
3260 | ||
dde5ea7b GS |
3261 | if (!strcmp(nat->type, "snat")) { |
3262 | snat_ips[n_snat_ips++] = ip; | |
3263 | continue; | |
3264 | } | |
3265 | ||
3266 | /* ARP handling for external IP addresses. | |
3267 | * | |
3268 | * DNAT IP addresses are external IP addresses that need ARP | |
3269 | * handling. */ | |
09b39248 JP |
3270 | ds_clear(&match); |
3271 | ds_put_format(&match, | |
3272 | "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", | |
3273 | op->json_key, IP_ARGS(ip)); | |
4685e523 | 3274 | |
09b39248 JP |
3275 | ds_clear(&actions); |
3276 | ds_put_format(&actions, | |
de297547 | 3277 | "eth.dst = eth.src; " |
4685e523 | 3278 | "eth.src = %s; " |
de297547 GS |
3279 | "arp.op = 2; /* ARP reply */ " |
3280 | "arp.tha = arp.sha; " | |
4685e523 | 3281 | "arp.sha = %s; " |
de297547 GS |
3282 | "arp.tpa = arp.spa; " |
3283 | "arp.spa = "IP_FMT"; " | |
3284 | "outport = %s; " | |
bf143492 | 3285 | "flags.loopback = 1; " |
de297547 | 3286 | "output;", |
4685e523 JP |
3287 | op->lrp_networks.ea_s, |
3288 | op->lrp_networks.ea_s, | |
de297547 GS |
3289 | IP_ARGS(ip), |
3290 | op->json_key); | |
3291 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
09b39248 | 3292 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
3293 | } |
3294 | ||
4685e523 JP |
3295 | ds_clear(&match); |
3296 | ds_put_cstr(&match, "ip4.dst == {"); | |
3297 | bool has_drop_ips = false; | |
3298 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
49da9ec0 | 3299 | bool snat_ip_is_router_ip = false; |
dde5ea7b GS |
3300 | for (int j = 0; j < n_snat_ips; j++) { |
3301 | /* Packets to SNAT IPs should not be dropped. */ | |
3302 | if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) { | |
49da9ec0 CSV |
3303 | snat_ip_is_router_ip = true; |
3304 | break; | |
4685e523 | 3305 | } |
4ef48e9d | 3306 | } |
49da9ec0 CSV |
3307 | if (snat_ip_is_router_ip) { |
3308 | continue; | |
3309 | } | |
4685e523 JP |
3310 | ds_put_format(&match, "%s, ", |
3311 | op->lrp_networks.ipv4_addrs[i].addr_s); | |
3312 | has_drop_ips = true; | |
4ef48e9d | 3313 | } |
4685e523 JP |
3314 | ds_chomp(&match, ' '); |
3315 | ds_chomp(&match, ','); | |
3316 | ds_put_cstr(&match, "}"); | |
4ef48e9d | 3317 | |
4685e523 JP |
3318 | if (has_drop_ips) { |
3319 | /* Drop IP traffic to this router. */ | |
09b39248 JP |
3320 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, |
3321 | ds_cstr(&match), "drop;"); | |
4ef48e9d | 3322 | } |
4685e523 | 3323 | |
dde5ea7b | 3324 | free(snat_ips); |
9975d7be BP |
3325 | } |
3326 | ||
6fdb7cd6 JP |
3327 | /* Logical router ingress table 1: IP Input for IPv6. */ |
3328 | HMAP_FOR_EACH (op, key_node, ports) { | |
3329 | if (!op->nbrp) { | |
3330 | continue; | |
3331 | } | |
3332 | ||
3333 | if (op->lrp_networks.n_ipv6_addrs) { | |
3334 | /* L3 admission control: drop packets that originate from an | |
3335 | * IPv6 address owned by the router (priority 100). */ | |
3336 | ds_clear(&match); | |
3337 | ds_put_cstr(&match, "ip6.src == "); | |
3338 | op_put_v6_networks(&match, op); | |
3339 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, | |
3340 | ds_cstr(&match), "drop;"); | |
3341 | ||
3342 | /* ICMPv6 echo reply. These flows reply to echo requests | |
3343 | * received for the router's IP address. */ | |
3344 | ds_clear(&match); | |
3345 | ds_put_cstr(&match, "ip6.dst == "); | |
3346 | op_put_v6_networks(&match, op); | |
3347 | ds_put_cstr(&match, " && icmp6.type == 128 && icmp6.code == 0"); | |
3348 | ||
3349 | ds_clear(&actions); | |
3350 | ds_put_cstr(&actions, | |
3351 | "ip6.dst <-> ip6.src; " | |
3352 | "ip.ttl = 255; " | |
3353 | "icmp6.type = 129; " | |
bf143492 | 3354 | "flags.loopback = 1; " |
6fdb7cd6 JP |
3355 | "next; "); |
3356 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
3357 | ds_cstr(&match), ds_cstr(&actions)); | |
3358 | ||
3359 | /* Drop IPv6 traffic to this router. */ | |
3360 | ds_clear(&match); | |
3361 | ds_put_cstr(&match, "ip6.dst == "); | |
3362 | op_put_v6_networks(&match, op); | |
3363 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, | |
3364 | ds_cstr(&match), "drop;"); | |
3365 | } | |
3366 | ||
3367 | /* ND reply. These flows reply to ND solicitations for the | |
3368 | * router's own IP address. */ | |
3369 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
3370 | ds_clear(&match); | |
3371 | ds_put_format(&match, | |
3372 | "inport == %s && nd_ns && ip6.dst == {%s, %s} " | |
3373 | "&& nd.target == %s", | |
3374 | op->json_key, | |
3375 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
3376 | op->lrp_networks.ipv6_addrs[i].sn_addr_s, | |
3377 | op->lrp_networks.ipv6_addrs[i].addr_s); | |
3378 | ||
3379 | ds_clear(&actions); | |
3380 | ds_put_format(&actions, | |
c34a87b6 | 3381 | "put_nd(inport, ip6.src, nd.sll); " |
6fdb7cd6 JP |
3382 | "nd_na { " |
3383 | "eth.src = %s; " | |
3384 | "ip6.src = %s; " | |
3385 | "nd.target = %s; " | |
3386 | "nd.tll = %s; " | |
3387 | "outport = inport; " | |
bf143492 | 3388 | "flags.loopback = 1; " |
6fdb7cd6 JP |
3389 | "output; " |
3390 | "};", | |
3391 | op->lrp_networks.ea_s, | |
3392 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
3393 | op->lrp_networks.ipv6_addrs[i].addr_s, | |
3394 | op->lrp_networks.ea_s); | |
3395 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
3396 | ds_cstr(&match), ds_cstr(&actions)); | |
3397 | } | |
3398 | } | |
3399 | ||
de297547 GS |
3400 | /* NAT in Gateway routers. */ |
3401 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
3402 | if (!od->nbr) { | |
3403 | continue; | |
3404 | } | |
3405 | ||
3406 | /* Packets are allowed by default. */ | |
3407 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); | |
3408 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); | |
3409 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); | |
3410 | ||
3411 | /* NAT rules are only valid on Gateway routers. */ | |
3412 | if (!smap_get(&od->nbr->options, "chassis")) { | |
3413 | continue; | |
3414 | } | |
3415 | ||
3416 | for (int i = 0; i < od->nbr->n_nat; i++) { | |
3417 | const struct nbrec_nat *nat; | |
3418 | ||
3419 | nat = od->nbr->nat[i]; | |
3420 | ||
3421 | ovs_be32 ip, mask; | |
3422 | ||
3423 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
3424 | if (error || mask != OVS_BE32_MAX) { | |
3425 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
3426 | VLOG_WARN_RL(&rl, "bad external ip %s for nat", | |
3427 | nat->external_ip); | |
3428 | free(error); | |
3429 | continue; | |
3430 | } | |
3431 | ||
3432 | /* Check the validity of nat->logical_ip. 'logical_ip' can | |
3433 | * be a subnet when the type is "snat". */ | |
3434 | error = ip_parse_masked(nat->logical_ip, &ip, &mask); | |
3435 | if (!strcmp(nat->type, "snat")) { | |
3436 | if (error) { | |
3437 | static struct vlog_rate_limit rl = | |
3438 | VLOG_RATE_LIMIT_INIT(5, 1); | |
3439 | VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " | |
3440 | "in router "UUID_FMT"", | |
3441 | nat->logical_ip, UUID_ARGS(&od->key)); | |
3442 | free(error); | |
3443 | continue; | |
3444 | } | |
3445 | } else { | |
3446 | if (error || mask != OVS_BE32_MAX) { | |
3447 | static struct vlog_rate_limit rl = | |
3448 | VLOG_RATE_LIMIT_INIT(5, 1); | |
3449 | VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " | |
3450 | ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); | |
3451 | free(error); | |
3452 | continue; | |
3453 | } | |
3454 | } | |
3455 | ||
de297547 GS |
3456 | /* Ingress UNSNAT table: It is for already established connections' |
3457 | * reverse traffic. i.e., SNAT has already been done in egress | |
3458 | * pipeline and now the packet has entered the ingress pipeline as | |
3459 | * part of a reply. We undo the SNAT here. | |
3460 | * | |
3461 | * Undoing SNAT has to happen before DNAT processing. This is | |
3462 | * because when the packet was DNATed in ingress pipeline, it did | |
3463 | * not know about the possibility of eventual additional SNAT in | |
3464 | * egress pipeline. */ | |
3465 | if (!strcmp(nat->type, "snat") | |
3466 | || !strcmp(nat->type, "dnat_and_snat")) { | |
09b39248 JP |
3467 | ds_clear(&match); |
3468 | ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip); | |
de297547 | 3469 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, |
09b39248 | 3470 | ds_cstr(&match), "ct_snat; next;"); |
de297547 GS |
3471 | } |
3472 | ||
3473 | /* Ingress DNAT table: Packets enter the pipeline with destination | |
3474 | * IP address that needs to be DNATted from a external IP address | |
3475 | * to a logical IP address. */ | |
3476 | if (!strcmp(nat->type, "dnat") | |
3477 | || !strcmp(nat->type, "dnat_and_snat")) { | |
3478 | /* Packet when it goes from the initiator to destination. | |
3479 | * We need to zero the inport because the router can | |
3480 | * send the packet back through the same interface. */ | |
09b39248 JP |
3481 | ds_clear(&match); |
3482 | ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip); | |
3483 | ds_clear(&actions); | |
bf143492 | 3484 | ds_put_format(&actions,"flags.loopback = 1; ct_dnat(%s);", |
09b39248 | 3485 | nat->logical_ip); |
de297547 | 3486 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, |
09b39248 | 3487 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
3488 | } |
3489 | ||
3490 | /* Egress SNAT table: Packets enter the egress pipeline with | |
3491 | * source ip address that needs to be SNATted to a external ip | |
3492 | * address. */ | |
3493 | if (!strcmp(nat->type, "snat") | |
3494 | || !strcmp(nat->type, "dnat_and_snat")) { | |
09b39248 JP |
3495 | ds_clear(&match); |
3496 | ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip); | |
3497 | ds_clear(&actions); | |
3498 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
de297547 GS |
3499 | |
3500 | /* The priority here is calculated such that the | |
3501 | * nat->logical_ip with the longest mask gets a higher | |
3502 | * priority. */ | |
3503 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
09b39248 JP |
3504 | count_1bits(ntohl(mask)) + 1, |
3505 | ds_cstr(&match), ds_cstr(&actions)); | |
de297547 GS |
3506 | } |
3507 | } | |
3508 | ||
3509 | /* Re-circulate every packet through the DNAT zone. | |
3510 | * This helps with two things. | |
3511 | * | |
3512 | * 1. Any packet that needs to be unDNATed in the reverse | |
3513 | * direction gets unDNATed. Ideally this could be done in | |
3514 | * the egress pipeline. But since the gateway router | |
3515 | * does not have any feature that depends on the source | |
3516 | * ip address being external IP address for IP routing, | |
3517 | * we can do it here, saving a future re-circulation. | |
3518 | * | |
3519 | * 2. Any packet that was sent through SNAT zone in the | |
3520 | * previous table automatically gets re-circulated to get | |
3521 | * back the new destination IP address that is needed for | |
3522 | * routing in the openflow pipeline. */ | |
3523 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
bf143492 | 3524 | "ip", "flags.loopback = 1; ct_dnat;"); |
de297547 GS |
3525 | } |
3526 | ||
94300e09 | 3527 | /* Logical router ingress table 4: IP Routing. |
9975d7be BP |
3528 | * |
3529 | * A packet that arrives at this table is an IP packet that should be | |
6fdb7cd6 JP |
3530 | * routed to the address in 'ip[46].dst'. This table sets outport to |
3531 | * the correct output port, eth.src to the output port's MAC | |
3532 | * address, and '[xx]reg0' to the next-hop IP address (leaving | |
3533 | * 'ip[46].dst', the packet’s final destination, unchanged), and | |
3534 | * advances to the next table for ARP/ND resolution. */ | |
9975d7be | 3535 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 3536 | if (!op->nbrp) { |
9975d7be BP |
3537 | continue; |
3538 | } | |
3539 | ||
4685e523 JP |
3540 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
3541 | add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, | |
3542 | op->lrp_networks.ipv4_addrs[i].network_s, | |
3543 | op->lrp_networks.ipv4_addrs[i].plen, NULL); | |
3544 | } | |
6fdb7cd6 JP |
3545 | |
3546 | for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { | |
3547 | add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s, | |
3548 | op->lrp_networks.ipv6_addrs[i].network_s, | |
3549 | op->lrp_networks.ipv6_addrs[i].plen, NULL); | |
3550 | } | |
9975d7be | 3551 | } |
4685e523 | 3552 | |
6fdb7cd6 | 3553 | /* Convert the static routes to flows. */ |
9975d7be BP |
3554 | HMAP_FOR_EACH (od, key_node, datapaths) { |
3555 | if (!od->nbr) { | |
3556 | continue; | |
3557 | } | |
3558 | ||
28dc3fe9 SR |
3559 | for (int i = 0; i < od->nbr->n_static_routes; i++) { |
3560 | const struct nbrec_logical_router_static_route *route; | |
3561 | ||
3562 | route = od->nbr->static_routes[i]; | |
3563 | build_static_route_flow(lflows, od, ports, route); | |
3564 | } | |
9975d7be | 3565 | } |
6fdb7cd6 | 3566 | |
9975d7be BP |
3567 | /* XXX destination unreachable */ |
3568 | ||
94300e09 | 3569 | /* Local router ingress table 5: ARP Resolution. |
9975d7be BP |
3570 | * |
3571 | * Any packet that reaches this table is an IP packet whose next-hop IP | |
3572 | * address is in reg0. (ip4.dst is the final destination.) This table | |
3573 | * resolves the IP address in reg0 into an output port in outport and an | |
3574 | * Ethernet address in eth.dst. */ | |
3575 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3576 | if (op->nbrp) { |
6fdb7cd6 JP |
3577 | /* This is a logical router port. If next-hop IP address in |
3578 | * '[xx]reg0' matches IP address of this router port, then | |
3579 | * the packet is intended to eventually be sent to this | |
3580 | * logical port. Set the destination mac address using this | |
3581 | * port's mac address. | |
509afdc3 GS |
3582 | * |
3583 | * The packet is still in peer's logical pipeline. So the match | |
3584 | * should be on peer's outport. */ | |
6fdb7cd6 JP |
3585 | if (op->peer && op->nbrp->peer) { |
3586 | if (op->lrp_networks.n_ipv4_addrs) { | |
3587 | ds_clear(&match); | |
3588 | ds_put_format(&match, "outport == %s && reg0 == ", | |
3589 | op->peer->json_key); | |
3590 | op_put_v4_networks(&match, op, false); | |
3591 | ||
3592 | ds_clear(&actions); | |
3593 | ds_put_format(&actions, "eth.dst = %s; next;", | |
3594 | op->lrp_networks.ea_s); | |
3595 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
3596 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
3597 | } | |
4685e523 | 3598 | |
6fdb7cd6 JP |
3599 | if (op->lrp_networks.n_ipv6_addrs) { |
3600 | ds_clear(&match); | |
3601 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
3602 | op->peer->json_key); | |
3603 | op_put_v6_networks(&match, op); | |
3604 | ||
3605 | ds_clear(&actions); | |
3606 | ds_put_format(&actions, "eth.dst = %s; next;", | |
3607 | op->lrp_networks.ea_s); | |
3608 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
3609 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
3610 | } | |
509afdc3 | 3611 | } |
0ee00741 | 3612 | } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
3613 | /* This is a logical switch port that backs a VM or a container. |
3614 | * Extract its addresses. For each of the address, go through all | |
3615 | * the router ports attached to the switch (to which this port | |
3616 | * connects) and if the address in question is reachable from the | |
6fdb7cd6 | 3617 | * router port, add an ARP/ND entry in that router's pipeline. */ |
75cf9d2b | 3618 | |
e93b43d6 | 3619 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
4685e523 | 3620 | const char *ea_s = op->lsp_addrs[i].ea_s; |
e93b43d6 | 3621 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { |
4685e523 | 3622 | const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; |
e93b43d6 | 3623 | for (size_t k = 0; k < op->od->n_router_ports; k++) { |
80f408f4 JP |
3624 | /* Get the Logical_Router_Port that the |
3625 | * Logical_Switch_Port is connected to, as | |
3626 | * 'peer'. */ | |
86e98048 | 3627 | const char *peer_name = smap_get( |
0ee00741 | 3628 | &op->od->router_ports[k]->nbsp->options, |
86e98048 BP |
3629 | "router-port"); |
3630 | if (!peer_name) { | |
3631 | continue; | |
3632 | } | |
3633 | ||
e93b43d6 | 3634 | struct ovn_port *peer = ovn_port_find(ports, peer_name); |
0ee00741 | 3635 | if (!peer || !peer->nbrp) { |
86e98048 BP |
3636 | continue; |
3637 | } | |
3638 | ||
4685e523 | 3639 | if (!find_lrp_member_ip(peer, ip_s)) { |
86e98048 BP |
3640 | continue; |
3641 | } | |
3642 | ||
09b39248 | 3643 | ds_clear(&match); |
e93b43d6 | 3644 | ds_put_format(&match, "outport == %s && reg0 == %s", |
4685e523 JP |
3645 | peer->json_key, ip_s); |
3646 | ||
09b39248 | 3647 | ds_clear(&actions); |
4685e523 | 3648 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); |
86e98048 | 3649 | ovn_lflow_add(lflows, peer->od, |
09b39248 JP |
3650 | S_ROUTER_IN_ARP_RESOLVE, 100, |
3651 | ds_cstr(&match), ds_cstr(&actions)); | |
86e98048 | 3652 | } |
9975d7be | 3653 | } |
6fdb7cd6 JP |
3654 | |
3655 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
3656 | const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s; | |
3657 | for (size_t k = 0; k < op->od->n_router_ports; k++) { | |
3658 | /* Get the Logical_Router_Port that the | |
3659 | * Logical_Switch_Port is connected to, as | |
3660 | * 'peer'. */ | |
3661 | const char *peer_name = smap_get( | |
3662 | &op->od->router_ports[k]->nbsp->options, | |
3663 | "router-port"); | |
3664 | if (!peer_name) { | |
3665 | continue; | |
3666 | } | |
3667 | ||
3668 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
3669 | if (!peer || !peer->nbrp) { | |
3670 | continue; | |
3671 | } | |
3672 | ||
3673 | if (!find_lrp_member_ip(peer, ip_s)) { | |
3674 | continue; | |
3675 | } | |
3676 | ||
3677 | ds_clear(&match); | |
3678 | ds_put_format(&match, "outport == %s && xxreg0 == %s", | |
3679 | peer->json_key, ip_s); | |
3680 | ||
3681 | ds_clear(&actions); | |
3682 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); | |
3683 | ovn_lflow_add(lflows, peer->od, | |
3684 | S_ROUTER_IN_ARP_RESOLVE, 100, | |
3685 | ds_cstr(&match), ds_cstr(&actions)); | |
3686 | } | |
3687 | } | |
9975d7be | 3688 | } |
0ee00741 | 3689 | } else if (!strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
3690 | /* This is a logical switch port that connects to a router. */ |
3691 | ||
3692 | /* The peer of this switch port is the router port for which | |
3693 | * we need to add logical flows such that it can resolve | |
3694 | * ARP entries for all the other router ports connected to | |
3695 | * the switch in question. */ | |
3696 | ||
0ee00741 | 3697 | const char *peer_name = smap_get(&op->nbsp->options, |
75cf9d2b GS |
3698 | "router-port"); |
3699 | if (!peer_name) { | |
3700 | continue; | |
3701 | } | |
3702 | ||
3703 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 3704 | if (!peer || !peer->nbrp) { |
75cf9d2b GS |
3705 | continue; |
3706 | } | |
3707 | ||
4685e523 | 3708 | for (size_t i = 0; i < op->od->n_router_ports; i++) { |
75cf9d2b | 3709 | const char *router_port_name = smap_get( |
0ee00741 | 3710 | &op->od->router_ports[i]->nbsp->options, |
75cf9d2b GS |
3711 | "router-port"); |
3712 | struct ovn_port *router_port = ovn_port_find(ports, | |
3713 | router_port_name); | |
0ee00741 | 3714 | if (!router_port || !router_port->nbrp) { |
75cf9d2b GS |
3715 | continue; |
3716 | } | |
3717 | ||
3718 | /* Skip the router port under consideration. */ | |
3719 | if (router_port == peer) { | |
3720 | continue; | |
3721 | } | |
3722 | ||
6fdb7cd6 JP |
3723 | if (router_port->lrp_networks.n_ipv4_addrs) { |
3724 | ds_clear(&match); | |
3725 | ds_put_format(&match, "outport == %s && reg0 == ", | |
3726 | peer->json_key); | |
3727 | op_put_v4_networks(&match, router_port, false); | |
3728 | ||
3729 | ds_clear(&actions); | |
3730 | ds_put_format(&actions, "eth.dst = %s; next;", | |
3731 | router_port->lrp_networks.ea_s); | |
3732 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
3733 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
3734 | } | |
4685e523 | 3735 | |
6fdb7cd6 JP |
3736 | if (router_port->lrp_networks.n_ipv6_addrs) { |
3737 | ds_clear(&match); | |
3738 | ds_put_format(&match, "outport == %s && xxreg0 == ", | |
3739 | peer->json_key); | |
3740 | op_put_v6_networks(&match, router_port); | |
3741 | ||
3742 | ds_clear(&actions); | |
3743 | ds_put_format(&actions, "eth.dst = %s; next;", | |
3744 | router_port->lrp_networks.ea_s); | |
3745 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, | |
3746 | 100, ds_cstr(&match), ds_cstr(&actions)); | |
3747 | } | |
75cf9d2b | 3748 | } |
9975d7be BP |
3749 | } |
3750 | } | |
75cf9d2b | 3751 | |
0bac7164 BP |
3752 | HMAP_FOR_EACH (od, key_node, datapaths) { |
3753 | if (!od->nbr) { | |
3754 | continue; | |
3755 | } | |
3756 | ||
3757 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1", | |
3758 | "get_arp(outport, reg0); next;"); | |
c34a87b6 JP |
3759 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4", |
3760 | "get_arp(outport, reg0); next;"); | |
3761 | ||
3762 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6", | |
3763 | "get_nd(outport, xxreg0); next;"); | |
0bac7164 BP |
3764 | } |
3765 | ||
94300e09 | 3766 | /* Local router ingress table 6: ARP request. |
0bac7164 BP |
3767 | * |
3768 | * In the common case where the Ethernet destination has been resolved, | |
94300e09 JP |
3769 | * this table outputs the packet (priority 0). Otherwise, it composes |
3770 | * and sends an ARP request (priority 100). */ | |
0bac7164 BP |
3771 | HMAP_FOR_EACH (od, key_node, datapaths) { |
3772 | if (!od->nbr) { | |
3773 | continue; | |
3774 | } | |
3775 | ||
3776 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, | |
3777 | "eth.dst == 00:00:00:00:00:00", | |
3778 | "arp { " | |
3779 | "eth.dst = ff:ff:ff:ff:ff:ff; " | |
3780 | "arp.spa = reg1; " | |
3781 | "arp.op = 1; " /* ARP request */ | |
3782 | "output; " | |
3783 | "};"); | |
3784 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); | |
3785 | } | |
9975d7be | 3786 | |
de297547 | 3787 | /* Logical router egress table 1: Delivery (priority 100). |
9975d7be BP |
3788 | * |
3789 | * Priority 100 rules deliver packets to enabled logical ports. */ | |
3790 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 3791 | if (!op->nbrp) { |
9975d7be BP |
3792 | continue; |
3793 | } | |
3794 | ||
0ee00741 | 3795 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
3796 | /* Drop packets to disabled logical ports (since logical flow |
3797 | * tables are default-drop). */ | |
3798 | continue; | |
3799 | } | |
3800 | ||
09b39248 JP |
3801 | ds_clear(&match); |
3802 | ds_put_format(&match, "outport == %s", op->json_key); | |
9975d7be | 3803 | ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, |
09b39248 | 3804 | ds_cstr(&match), "output;"); |
9975d7be | 3805 | } |
09b39248 JP |
3806 | |
3807 | ds_destroy(&match); | |
3808 | ds_destroy(&actions); | |
9975d7be BP |
3809 | } |
3810 | ||
3811 | /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, | |
3812 | * constructing their contents based on the OVN_NB database. */ | |
3813 | static void | |
3814 | build_lflows(struct northd_context *ctx, struct hmap *datapaths, | |
3815 | struct hmap *ports) | |
3816 | { | |
3817 | struct hmap lflows = HMAP_INITIALIZER(&lflows); | |
3818 | struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); | |
3819 | ||
3820 | build_lswitch_flows(datapaths, ports, &lflows, &mcgroups); | |
3821 | build_lrouter_flows(datapaths, ports, &lflows); | |
3822 | ||
5868eb24 BP |
3823 | /* Push changes to the Logical_Flow table to database. */ |
3824 | const struct sbrec_logical_flow *sbflow, *next_sbflow; | |
3825 | SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { | |
3826 | struct ovn_datapath *od | |
3827 | = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath); | |
3828 | if (!od) { | |
3829 | sbrec_logical_flow_delete(sbflow); | |
3830 | continue; | |
eb00399e | 3831 | } |
eb00399e | 3832 | |
9975d7be | 3833 | enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER; |
880fcd14 BP |
3834 | enum ovn_pipeline pipeline |
3835 | = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT; | |
5868eb24 | 3836 | struct ovn_lflow *lflow = ovn_lflow_find( |
880fcd14 BP |
3837 | &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id), |
3838 | sbflow->priority, sbflow->match, sbflow->actions); | |
5868eb24 BP |
3839 | if (lflow) { |
3840 | ovn_lflow_destroy(&lflows, lflow); | |
3841 | } else { | |
3842 | sbrec_logical_flow_delete(sbflow); | |
4edcdcf4 RB |
3843 | } |
3844 | } | |
5868eb24 BP |
3845 | struct ovn_lflow *lflow, *next_lflow; |
3846 | HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) { | |
880fcd14 BP |
3847 | enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage); |
3848 | uint8_t table = ovn_stage_get_table(lflow->stage); | |
3849 | ||
5868eb24 BP |
3850 | sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn); |
3851 | sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb); | |
9975d7be BP |
3852 | sbrec_logical_flow_set_pipeline( |
3853 | sbflow, pipeline == P_IN ? "ingress" : "egress"); | |
880fcd14 | 3854 | sbrec_logical_flow_set_table_id(sbflow, table); |
5868eb24 BP |
3855 | sbrec_logical_flow_set_priority(sbflow, lflow->priority); |
3856 | sbrec_logical_flow_set_match(sbflow, lflow->match); | |
3857 | sbrec_logical_flow_set_actions(sbflow, lflow->actions); | |
091e3af9 | 3858 | |
880fcd14 BP |
3859 | const struct smap ids = SMAP_CONST1(&ids, "stage-name", |
3860 | ovn_stage_to_str(lflow->stage)); | |
aaf881c6 | 3861 | sbrec_logical_flow_set_external_ids(sbflow, &ids); |
091e3af9 | 3862 | |
5868eb24 | 3863 | ovn_lflow_destroy(&lflows, lflow); |
eb00399e | 3864 | } |
5868eb24 BP |
3865 | hmap_destroy(&lflows); |
3866 | ||
3867 | /* Push changes to the Multicast_Group table to database. */ | |
3868 | const struct sbrec_multicast_group *sbmc, *next_sbmc; | |
3869 | SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) { | |
3870 | struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths, | |
3871 | sbmc->datapath); | |
3872 | if (!od) { | |
3873 | sbrec_multicast_group_delete(sbmc); | |
3874 | continue; | |
3875 | } | |
eb00399e | 3876 | |
5868eb24 BP |
3877 | struct multicast_group group = { .name = sbmc->name, |
3878 | .key = sbmc->tunnel_key }; | |
3879 | struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group); | |
3880 | if (mc) { | |
3881 | ovn_multicast_update_sbrec(mc, sbmc); | |
3882 | ovn_multicast_destroy(&mcgroups, mc); | |
3883 | } else { | |
3884 | sbrec_multicast_group_delete(sbmc); | |
3885 | } | |
3886 | } | |
3887 | struct ovn_multicast *mc, *next_mc; | |
3888 | HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) { | |
3889 | sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn); | |
3890 | sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb); | |
3891 | sbrec_multicast_group_set_name(sbmc, mc->group->name); | |
3892 | sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key); | |
3893 | ovn_multicast_update_sbrec(mc, sbmc); | |
3894 | ovn_multicast_destroy(&mcgroups, mc); | |
4edcdcf4 | 3895 | } |
5868eb24 | 3896 | hmap_destroy(&mcgroups); |
4edcdcf4 | 3897 | } |
ea382567 RB |
3898 | |
3899 | /* OVN_Northbound and OVN_Southbound have an identical Address_Set table. | |
3900 | * We always update OVN_Southbound to match the current data in | |
3901 | * OVN_Northbound, so that the address sets used in Logical_Flows in | |
3902 | * OVN_Southbound is checked against the proper set.*/ | |
3903 | static void | |
3904 | sync_address_sets(struct northd_context *ctx) | |
3905 | { | |
3906 | struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); | |
3907 | ||
3908 | const struct sbrec_address_set *sb_address_set; | |
3909 | SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) { | |
3910 | shash_add(&sb_address_sets, sb_address_set->name, sb_address_set); | |
3911 | } | |
3912 | ||
3913 | const struct nbrec_address_set *nb_address_set; | |
3914 | NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) { | |
3915 | sb_address_set = shash_find_and_delete(&sb_address_sets, | |
3916 | nb_address_set->name); | |
3917 | if (!sb_address_set) { | |
3918 | sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn); | |
3919 | sbrec_address_set_set_name(sb_address_set, nb_address_set->name); | |
3920 | } | |
3921 | ||
3922 | sbrec_address_set_set_addresses(sb_address_set, | |
3923 | /* "char **" is not compatible with "const char **" */ | |
3924 | (const char **) nb_address_set->addresses, | |
3925 | nb_address_set->n_addresses); | |
3926 | } | |
3927 | ||
3928 | struct shash_node *node, *next; | |
3929 | SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) { | |
3930 | sbrec_address_set_delete(node->data); | |
3931 | shash_delete(&sb_address_sets, node); | |
3932 | } | |
3933 | shash_destroy(&sb_address_sets); | |
3934 | } | |
5868eb24 | 3935 | \f |
4edcdcf4 | 3936 | static void |
fa183acc | 3937 | ovnnb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop) |
4edcdcf4 | 3938 | { |
fa183acc | 3939 | if (!ctx->ovnsb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnnb_idl)) { |
331e7aef NS |
3940 | return; |
3941 | } | |
5868eb24 BP |
3942 | struct hmap datapaths, ports; |
3943 | build_datapaths(ctx, &datapaths); | |
3944 | build_ports(ctx, &datapaths, &ports); | |
8639f9be | 3945 | build_ipam(ctx, &datapaths, &ports); |
5868eb24 BP |
3946 | build_lflows(ctx, &datapaths, &ports); |
3947 | ||
ea382567 RB |
3948 | sync_address_sets(ctx); |
3949 | ||
5868eb24 BP |
3950 | struct ovn_datapath *dp, *next_dp; |
3951 | HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) { | |
3952 | ovn_datapath_destroy(&datapaths, dp); | |
3953 | } | |
3954 | hmap_destroy(&datapaths); | |
3955 | ||
3956 | struct ovn_port *port, *next_port; | |
3957 | HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) { | |
3958 | ovn_port_destroy(&ports, port); | |
3959 | } | |
3960 | hmap_destroy(&ports); | |
fa183acc BP |
3961 | |
3962 | /* Copy nb_cfg from northbound to southbound database. | |
3963 | * | |
3964 | * Also set up to update sb_cfg once our southbound transaction commits. */ | |
3965 | const struct nbrec_nb_global *nb = nbrec_nb_global_first(ctx->ovnnb_idl); | |
3966 | const struct sbrec_sb_global *sb = sbrec_sb_global_first(ctx->ovnsb_idl); | |
3967 | if (nb && sb) { | |
3968 | sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg); | |
3969 | sb_loop->next_cfg = nb->nb_cfg; | |
3970 | } | |
8639f9be ND |
3971 | |
3972 | cleanup_macam(&macam); | |
ac0630a2 RB |
3973 | } |
3974 | ||
fa183acc BP |
3975 | /* Handle changes to the 'chassis' column of the 'Port_Binding' table. When |
3976 | * this column is not empty, it means we need to set the corresponding logical | |
3977 | * port as 'up' in the northbound DB. */ | |
ac0630a2 | 3978 | static void |
fa183acc | 3979 | update_logical_port_status(struct northd_context *ctx) |
ac0630a2 | 3980 | { |
fc3113bc | 3981 | struct hmap lports_hmap; |
5868eb24 | 3982 | const struct sbrec_port_binding *sb; |
0ee00741 | 3983 | const struct nbrec_logical_switch_port *nbsp; |
fc3113bc RB |
3984 | |
3985 | struct lport_hash_node { | |
3986 | struct hmap_node node; | |
0ee00741 | 3987 | const struct nbrec_logical_switch_port *nbsp; |
4ec3d7c7 | 3988 | } *hash_node; |
f93818dd | 3989 | |
fc3113bc | 3990 | hmap_init(&lports_hmap); |
f93818dd | 3991 | |
0ee00741 | 3992 | NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) { |
fc3113bc | 3993 | hash_node = xzalloc(sizeof *hash_node); |
0ee00741 HK |
3994 | hash_node->nbsp = nbsp; |
3995 | hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0)); | |
fc3113bc RB |
3996 | } |
3997 | ||
5868eb24 | 3998 | SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) { |
0ee00741 | 3999 | nbsp = NULL; |
fc3113bc | 4000 | HMAP_FOR_EACH_WITH_HASH(hash_node, node, |
5868eb24 BP |
4001 | hash_string(sb->logical_port, 0), |
4002 | &lports_hmap) { | |
0ee00741 HK |
4003 | if (!strcmp(sb->logical_port, hash_node->nbsp->name)) { |
4004 | nbsp = hash_node->nbsp; | |
fc3113bc RB |
4005 | break; |
4006 | } | |
f93818dd RB |
4007 | } |
4008 | ||
0ee00741 | 4009 | if (!nbsp) { |
dcda6e0d | 4010 | /* The logical port doesn't exist for this port binding. This can |
2e2762d4 | 4011 | * happen under normal circumstances when ovn-northd hasn't gotten |
dcda6e0d | 4012 | * around to pruning the Port_Binding yet. */ |
f93818dd RB |
4013 | continue; |
4014 | } | |
4015 | ||
0ee00741 | 4016 | if (sb->chassis && (!nbsp->up || !*nbsp->up)) { |
f93818dd | 4017 | bool up = true; |
0ee00741 HK |
4018 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
4019 | } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) { | |
f93818dd | 4020 | bool up = false; |
0ee00741 | 4021 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
f93818dd RB |
4022 | } |
4023 | } | |
fc3113bc | 4024 | |
4ec3d7c7 | 4025 | HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) { |
fc3113bc RB |
4026 | free(hash_node); |
4027 | } | |
4028 | hmap_destroy(&lports_hmap); | |
ac0630a2 | 4029 | } |
45f98d4c | 4030 | |
281977f7 NS |
4031 | static struct dhcp_opts_map supported_dhcp_opts[] = { |
4032 | OFFERIP, | |
4033 | DHCP_OPT_NETMASK, | |
4034 | DHCP_OPT_ROUTER, | |
4035 | DHCP_OPT_DNS_SERVER, | |
4036 | DHCP_OPT_LOG_SERVER, | |
4037 | DHCP_OPT_LPR_SERVER, | |
4038 | DHCP_OPT_SWAP_SERVER, | |
4039 | DHCP_OPT_POLICY_FILTER, | |
4040 | DHCP_OPT_ROUTER_SOLICITATION, | |
4041 | DHCP_OPT_NIS_SERVER, | |
4042 | DHCP_OPT_NTP_SERVER, | |
4043 | DHCP_OPT_SERVER_ID, | |
4044 | DHCP_OPT_TFTP_SERVER, | |
4045 | DHCP_OPT_CLASSLESS_STATIC_ROUTE, | |
4046 | DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE, | |
4047 | DHCP_OPT_IP_FORWARD_ENABLE, | |
4048 | DHCP_OPT_ROUTER_DISCOVERY, | |
4049 | DHCP_OPT_ETHERNET_ENCAP, | |
4050 | DHCP_OPT_DEFAULT_TTL, | |
4051 | DHCP_OPT_TCP_TTL, | |
4052 | DHCP_OPT_MTU, | |
4053 | DHCP_OPT_LEASE_TIME, | |
4054 | DHCP_OPT_T1, | |
4055 | DHCP_OPT_T2 | |
4056 | }; | |
4057 | ||
33ac3c83 NS |
4058 | static struct dhcp_opts_map supported_dhcpv6_opts[] = { |
4059 | DHCPV6_OPT_IA_ADDR, | |
4060 | DHCPV6_OPT_SERVER_ID, | |
4061 | DHCPV6_OPT_DOMAIN_SEARCH, | |
4062 | DHCPV6_OPT_DNS_SERVER | |
4063 | }; | |
4064 | ||
281977f7 NS |
4065 | static void |
4066 | check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx) | |
4067 | { | |
4068 | struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add); | |
4069 | for (size_t i = 0; (i < sizeof(supported_dhcp_opts) / | |
4070 | sizeof(supported_dhcp_opts[0])); i++) { | |
4071 | hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node, | |
4072 | dhcp_opt_hash(supported_dhcp_opts[i].name)); | |
4073 | } | |
4074 | ||
4075 | const struct sbrec_dhcp_options *opt_row, *opt_row_next; | |
4076 | SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
4077 | struct dhcp_opts_map *dhcp_opt = | |
4078 | dhcp_opts_find(&dhcp_opts_to_add, opt_row->name); | |
4079 | if (dhcp_opt) { | |
4080 | hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node); | |
4081 | } else { | |
4082 | sbrec_dhcp_options_delete(opt_row); | |
4083 | } | |
4084 | } | |
4085 | ||
4086 | struct dhcp_opts_map *opt; | |
4087 | HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) { | |
4088 | struct sbrec_dhcp_options *sbrec_dhcp_option = | |
4089 | sbrec_dhcp_options_insert(ctx->ovnsb_txn); | |
4090 | sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name); | |
4091 | sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code); | |
4092 | sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type); | |
4093 | } | |
4094 | ||
4095 | hmap_destroy(&dhcp_opts_to_add); | |
4096 | } | |
4097 | ||
33ac3c83 NS |
4098 | static void |
4099 | check_and_add_supported_dhcpv6_opts_to_sb_db(struct northd_context *ctx) | |
4100 | { | |
4101 | struct hmap dhcpv6_opts_to_add = HMAP_INITIALIZER(&dhcpv6_opts_to_add); | |
4102 | for (size_t i = 0; (i < sizeof(supported_dhcpv6_opts) / | |
4103 | sizeof(supported_dhcpv6_opts[0])); i++) { | |
4104 | hmap_insert(&dhcpv6_opts_to_add, &supported_dhcpv6_opts[i].hmap_node, | |
4105 | dhcp_opt_hash(supported_dhcpv6_opts[i].name)); | |
4106 | } | |
4107 | ||
4108 | const struct sbrec_dhcpv6_options *opt_row, *opt_row_next; | |
4109 | SBREC_DHCPV6_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) { | |
4110 | struct dhcp_opts_map *dhcp_opt = | |
4111 | dhcp_opts_find(&dhcpv6_opts_to_add, opt_row->name); | |
4112 | if (dhcp_opt) { | |
4113 | hmap_remove(&dhcpv6_opts_to_add, &dhcp_opt->hmap_node); | |
4114 | } else { | |
4115 | sbrec_dhcpv6_options_delete(opt_row); | |
4116 | } | |
4117 | } | |
4118 | ||
4119 | struct dhcp_opts_map *opt; | |
4120 | HMAP_FOR_EACH(opt, hmap_node, &dhcpv6_opts_to_add) { | |
4121 | struct sbrec_dhcpv6_options *sbrec_dhcpv6_option = | |
4122 | sbrec_dhcpv6_options_insert(ctx->ovnsb_txn); | |
4123 | sbrec_dhcpv6_options_set_name(sbrec_dhcpv6_option, opt->name); | |
4124 | sbrec_dhcpv6_options_set_code(sbrec_dhcpv6_option, opt->code); | |
4125 | sbrec_dhcpv6_options_set_type(sbrec_dhcpv6_option, opt->type); | |
4126 | } | |
4127 | ||
4128 | hmap_destroy(&dhcpv6_opts_to_add); | |
4129 | } | |
4130 | ||
fa183acc BP |
4131 | /* Updates the sb_cfg and hv_cfg columns in the northbound NB_Global table. */ |
4132 | static void | |
4133 | update_northbound_cfg(struct northd_context *ctx, | |
4134 | struct ovsdb_idl_loop *sb_loop) | |
4135 | { | |
4136 | /* Update northbound sb_cfg if appropriate. */ | |
4137 | const struct nbrec_nb_global *nbg = nbrec_nb_global_first(ctx->ovnnb_idl); | |
4138 | int64_t sb_cfg = sb_loop->cur_cfg; | |
4139 | if (nbg && sb_cfg && nbg->sb_cfg != sb_cfg) { | |
4140 | nbrec_nb_global_set_sb_cfg(nbg, sb_cfg); | |
4141 | } | |
4142 | ||
4143 | /* Update northbound hv_cfg if appropriate. */ | |
4144 | if (nbg) { | |
4145 | /* Find minimum nb_cfg among all chassis. */ | |
4146 | const struct sbrec_chassis *chassis; | |
4147 | int64_t hv_cfg = nbg->nb_cfg; | |
4148 | SBREC_CHASSIS_FOR_EACH (chassis, ctx->ovnsb_idl) { | |
4149 | if (chassis->nb_cfg < hv_cfg) { | |
4150 | hv_cfg = chassis->nb_cfg; | |
4151 | } | |
4152 | } | |
4153 | ||
4154 | /* Update hv_cfg. */ | |
4155 | if (nbg->hv_cfg != hv_cfg) { | |
4156 | nbrec_nb_global_set_hv_cfg(nbg, hv_cfg); | |
4157 | } | |
4158 | } | |
4159 | } | |
4160 | ||
4161 | /* Handle a fairly small set of changes in the southbound database. */ | |
4162 | static void | |
4163 | ovnsb_db_run(struct northd_context *ctx, struct ovsdb_idl_loop *sb_loop) | |
4164 | { | |
4165 | if (!ctx->ovnnb_txn || !ovsdb_idl_has_ever_connected(ctx->ovnsb_idl)) { | |
4166 | return; | |
4167 | } | |
4168 | ||
4169 | update_logical_port_status(ctx); | |
4170 | update_northbound_cfg(ctx, sb_loop); | |
4171 | } | |
4172 | \f | |
ac0630a2 RB |
4173 | static void |
4174 | parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) | |
4175 | { | |
4176 | enum { | |
67d9b930 | 4177 | DAEMON_OPTION_ENUMS, |
ac0630a2 RB |
4178 | VLOG_OPTION_ENUMS, |
4179 | }; | |
4180 | static const struct option long_options[] = { | |
ec78987f | 4181 | {"ovnsb-db", required_argument, NULL, 'd'}, |
ac0630a2 RB |
4182 | {"ovnnb-db", required_argument, NULL, 'D'}, |
4183 | {"help", no_argument, NULL, 'h'}, | |
4184 | {"options", no_argument, NULL, 'o'}, | |
4185 | {"version", no_argument, NULL, 'V'}, | |
67d9b930 | 4186 | DAEMON_LONG_OPTIONS, |
ac0630a2 RB |
4187 | VLOG_LONG_OPTIONS, |
4188 | STREAM_SSL_LONG_OPTIONS, | |
4189 | {NULL, 0, NULL, 0}, | |
4190 | }; | |
4191 | char *short_options = ovs_cmdl_long_options_to_short_options(long_options); | |
4192 | ||
4193 | for (;;) { | |
4194 | int c; | |
4195 | ||
4196 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
4197 | if (c == -1) { | |
4198 | break; | |
4199 | } | |
4200 | ||
4201 | switch (c) { | |
67d9b930 | 4202 | DAEMON_OPTION_HANDLERS; |
ac0630a2 RB |
4203 | VLOG_OPTION_HANDLERS; |
4204 | STREAM_SSL_OPTION_HANDLERS; | |
4205 | ||
4206 | case 'd': | |
ec78987f | 4207 | ovnsb_db = optarg; |
ac0630a2 RB |
4208 | break; |
4209 | ||
4210 | case 'D': | |
4211 | ovnnb_db = optarg; | |
4212 | break; | |
4213 | ||
4214 | case 'h': | |
4215 | usage(); | |
4216 | exit(EXIT_SUCCESS); | |
4217 | ||
4218 | case 'o': | |
4219 | ovs_cmdl_print_options(long_options); | |
4220 | exit(EXIT_SUCCESS); | |
4221 | ||
4222 | case 'V': | |
4223 | ovs_print_version(0, 0); | |
4224 | exit(EXIT_SUCCESS); | |
4225 | ||
4226 | default: | |
4227 | break; | |
4228 | } | |
4229 | } | |
4230 | ||
ec78987f | 4231 | if (!ovnsb_db) { |
60bdd011 | 4232 | ovnsb_db = default_sb_db(); |
ac0630a2 RB |
4233 | } |
4234 | ||
4235 | if (!ovnnb_db) { | |
60bdd011 | 4236 | ovnnb_db = default_nb_db(); |
ac0630a2 RB |
4237 | } |
4238 | ||
4239 | free(short_options); | |
4240 | } | |
4241 | ||
5868eb24 BP |
4242 | static void |
4243 | add_column_noalert(struct ovsdb_idl *idl, | |
4244 | const struct ovsdb_idl_column *column) | |
4245 | { | |
4246 | ovsdb_idl_add_column(idl, column); | |
4247 | ovsdb_idl_omit_alert(idl, column); | |
4248 | } | |
4249 | ||
ac0630a2 RB |
4250 | int |
4251 | main(int argc, char *argv[]) | |
4252 | { | |
ac0630a2 | 4253 | int res = EXIT_SUCCESS; |
7b303ff9 AW |
4254 | struct unixctl_server *unixctl; |
4255 | int retval; | |
4256 | bool exiting; | |
ac0630a2 RB |
4257 | |
4258 | fatal_ignore_sigpipe(); | |
4259 | set_program_name(argv[0]); | |
485f0696 | 4260 | service_start(&argc, &argv); |
ac0630a2 | 4261 | parse_options(argc, argv); |
67d9b930 | 4262 | |
e91b927d | 4263 | daemonize_start(false); |
7b303ff9 AW |
4264 | |
4265 | retval = unixctl_server_create(NULL, &unixctl); | |
4266 | if (retval) { | |
4267 | exit(EXIT_FAILURE); | |
4268 | } | |
4269 | unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting); | |
4270 | ||
4271 | daemonize_complete(); | |
67d9b930 | 4272 | |
ac0630a2 | 4273 | nbrec_init(); |
ec78987f | 4274 | sbrec_init(); |
ac0630a2 | 4275 | |
fa183acc | 4276 | /* We want to detect (almost) all changes to the ovn-nb db. */ |
331e7aef NS |
4277 | struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
4278 | ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); | |
fa183acc BP |
4279 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_sb_cfg); |
4280 | ovsdb_idl_omit_alert(ovnnb_idl_loop.idl, &nbrec_nb_global_col_hv_cfg); | |
331e7aef | 4281 | |
fa183acc | 4282 | /* We want to detect only selected changes to the ovn-sb db. */ |
331e7aef NS |
4283 | struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
4284 | ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true)); | |
4285 | ||
fa183acc BP |
4286 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_sb_global); |
4287 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg); | |
4288 | ||
331e7aef NS |
4289 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow); |
4290 | add_column_noalert(ovnsb_idl_loop.idl, | |
4291 | &sbrec_logical_flow_col_logical_datapath); | |
4292 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline); | |
4293 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id); | |
4294 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority); | |
4295 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match); | |
4296 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions); | |
4297 | ||
4298 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group); | |
4299 | add_column_noalert(ovnsb_idl_loop.idl, | |
4300 | &sbrec_multicast_group_col_datapath); | |
4301 | add_column_noalert(ovnsb_idl_loop.idl, | |
4302 | &sbrec_multicast_group_col_tunnel_key); | |
4303 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name); | |
4304 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports); | |
4305 | ||
4306 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding); | |
4307 | add_column_noalert(ovnsb_idl_loop.idl, | |
4308 | &sbrec_datapath_binding_col_tunnel_key); | |
4309 | add_column_noalert(ovnsb_idl_loop.idl, | |
4310 | &sbrec_datapath_binding_col_external_ids); | |
4311 | ||
4312 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding); | |
4313 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath); | |
4314 | add_column_noalert(ovnsb_idl_loop.idl, | |
4315 | &sbrec_port_binding_col_logical_port); | |
4316 | add_column_noalert(ovnsb_idl_loop.idl, | |
4317 | &sbrec_port_binding_col_tunnel_key); | |
4318 | add_column_noalert(ovnsb_idl_loop.idl, | |
4319 | &sbrec_port_binding_col_parent_port); | |
4320 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag); | |
4321 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); | |
4322 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options); | |
4323 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); | |
4324 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis); | |
281977f7 NS |
4325 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options); |
4326 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code); | |
4327 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type); | |
4328 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name); | |
33ac3c83 NS |
4329 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcpv6_options); |
4330 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_code); | |
4331 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_type); | |
4332 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcpv6_options_col_name); | |
ea382567 RB |
4333 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set); |
4334 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name); | |
4335 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses); | |
4336 | ||
fa183acc BP |
4337 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis); |
4338 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_nb_cfg); | |
4339 | ||
331e7aef | 4340 | /* Main loop. */ |
7b303ff9 AW |
4341 | exiting = false; |
4342 | while (!exiting) { | |
331e7aef NS |
4343 | struct northd_context ctx = { |
4344 | .ovnnb_idl = ovnnb_idl_loop.idl, | |
4345 | .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), | |
4346 | .ovnsb_idl = ovnsb_idl_loop.idl, | |
4347 | .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), | |
4348 | }; | |
ac0630a2 | 4349 | |
fa183acc BP |
4350 | ovnnb_db_run(&ctx, &ovnsb_idl_loop); |
4351 | ovnsb_db_run(&ctx, &ovnsb_idl_loop); | |
281977f7 NS |
4352 | if (ctx.ovnsb_txn) { |
4353 | check_and_add_supported_dhcp_opts_to_sb_db(&ctx); | |
33ac3c83 | 4354 | check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx); |
281977f7 | 4355 | } |
f93818dd | 4356 | |
331e7aef NS |
4357 | unixctl_server_run(unixctl); |
4358 | unixctl_server_wait(unixctl); | |
4359 | if (exiting) { | |
4360 | poll_immediate_wake(); | |
ac0630a2 | 4361 | } |
331e7aef NS |
4362 | ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); |
4363 | ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); | |
ac0630a2 | 4364 | |
331e7aef | 4365 | poll_block(); |
485f0696 GS |
4366 | if (should_service_stop()) { |
4367 | exiting = true; | |
4368 | } | |
ac0630a2 RB |
4369 | } |
4370 | ||
7b303ff9 | 4371 | unixctl_server_destroy(unixctl); |
331e7aef NS |
4372 | ovsdb_idl_loop_destroy(&ovnnb_idl_loop); |
4373 | ovsdb_idl_loop_destroy(&ovnsb_idl_loop); | |
485f0696 | 4374 | service_stop(); |
ac0630a2 RB |
4375 | |
4376 | exit(res); | |
4377 | } | |
7b303ff9 AW |
4378 | |
4379 | static void | |
4380 | ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
4381 | const char *argv[] OVS_UNUSED, void *exiting_) | |
4382 | { | |
4383 | bool *exiting = exiting_; | |
4384 | *exiting = true; | |
4385 | ||
4386 | unixctl_command_reply(conn, NULL); | |
4387 | } |