]>
Commit | Line | Data |
---|---|---|
ac0630a2 RB |
1 | /* |
2 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
3 | * you may not use this file except in compliance with the License. | |
4 | * You may obtain a copy of the License at: | |
5 | * | |
6 | * http://www.apache.org/licenses/LICENSE-2.0 | |
7 | * | |
8 | * Unless required by applicable law or agreed to in writing, software | |
9 | * distributed under the License is distributed on an "AS IS" BASIS, | |
10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
11 | * See the License for the specific language governing permissions and | |
12 | * limitations under the License. | |
13 | */ | |
14 | ||
15 | #include <config.h> | |
16 | ||
17 | #include <getopt.h> | |
18 | #include <stdlib.h> | |
19 | #include <stdio.h> | |
20 | ||
21 | #include "command-line.h" | |
67d9b930 | 22 | #include "daemon.h" |
ac0630a2 | 23 | #include "dirs.h" |
3e8a2ad1 | 24 | #include "openvswitch/dynamic-string.h" |
ac0630a2 | 25 | #include "fatal-signal.h" |
4edcdcf4 | 26 | #include "hash.h" |
ee89ea7b TW |
27 | #include "openvswitch/hmap.h" |
28 | #include "openvswitch/json.h" | |
bd39395f | 29 | #include "ovn/lib/lex.h" |
e3df8838 BP |
30 | #include "ovn/lib/ovn-nb-idl.h" |
31 | #include "ovn/lib/ovn-sb-idl.h" | |
218351dd | 32 | #include "ovn/lib/ovn-util.h" |
064d7f84 | 33 | #include "packets.h" |
ac0630a2 | 34 | #include "poll-loop.h" |
5868eb24 | 35 | #include "smap.h" |
7a15be69 | 36 | #include "sset.h" |
ac0630a2 RB |
37 | #include "stream.h" |
38 | #include "stream-ssl.h" | |
7b303ff9 | 39 | #include "unixctl.h" |
ac0630a2 | 40 | #include "util.h" |
4edcdcf4 | 41 | #include "uuid.h" |
ac0630a2 RB |
42 | #include "openvswitch/vlog.h" |
43 | ||
2e2762d4 | 44 | VLOG_DEFINE_THIS_MODULE(ovn_northd); |
ac0630a2 | 45 | |
7b303ff9 AW |
46 | static unixctl_cb_func ovn_northd_exit; |
47 | ||
2e2762d4 | 48 | struct northd_context { |
f93818dd | 49 | struct ovsdb_idl *ovnnb_idl; |
ec78987f | 50 | struct ovsdb_idl *ovnsb_idl; |
f93818dd | 51 | struct ovsdb_idl_txn *ovnnb_txn; |
3c78b3ca | 52 | struct ovsdb_idl_txn *ovnsb_txn; |
f93818dd RB |
53 | }; |
54 | ||
ac0630a2 | 55 | static const char *ovnnb_db; |
ec78987f | 56 | static const char *ovnsb_db; |
ac0630a2 | 57 | |
60bdd011 RM |
58 | static const char *default_nb_db(void); |
59 | static const char *default_sb_db(void); | |
880fcd14 BP |
60 | \f |
61 | /* Pipeline stages. */ | |
ac0630a2 | 62 | |
880fcd14 BP |
63 | /* The two pipelines in an OVN logical flow table. */ |
64 | enum ovn_pipeline { | |
65 | P_IN, /* Ingress pipeline. */ | |
66 | P_OUT /* Egress pipeline. */ | |
67 | }; | |
091e3af9 | 68 | |
880fcd14 BP |
69 | /* The two purposes for which ovn-northd uses OVN logical datapaths. */ |
70 | enum ovn_datapath_type { | |
71 | DP_SWITCH, /* OVN logical switch. */ | |
72 | DP_ROUTER /* OVN logical router. */ | |
091e3af9 JP |
73 | }; |
74 | ||
880fcd14 BP |
75 | /* Returns an "enum ovn_stage" built from the arguments. |
76 | * | |
77 | * (It's better to use ovn_stage_build() for type-safety reasons, but inline | |
78 | * functions can't be used in enums or switch cases.) */ | |
79 | #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \ | |
80 | (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE)) | |
81 | ||
82 | /* A stage within an OVN logical switch or router. | |
091e3af9 | 83 | * |
880fcd14 BP |
84 | * An "enum ovn_stage" indicates whether the stage is part of a logical switch |
85 | * or router, whether the stage is part of the ingress or egress pipeline, and | |
86 | * the table within that pipeline. The first three components are combined to | |
685f4dfe | 87 | * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2, |
880fcd14 BP |
88 | * S_ROUTER_OUT_DELIVERY. */ |
89 | enum ovn_stage { | |
e0c9e58b JP |
90 | #define PIPELINE_STAGES \ |
91 | /* Logical switch ingress stages. */ \ | |
685f4dfe NS |
92 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ |
93 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ | |
94 | PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ | |
95 | PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ | |
7a15be69 GS |
96 | PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ |
97 | PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ | |
98 | PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ | |
99 | PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \ | |
100 | PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \ | |
101 | PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \ | |
102 | PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 10, "ls_in_l2_lkup") \ | |
e0c9e58b JP |
103 | \ |
104 | /* Logical switch egress stages. */ \ | |
7a15be69 GS |
105 | PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ |
106 | PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ | |
107 | PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ | |
108 | PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ | |
109 | PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ | |
110 | PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \ | |
111 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \ | |
112 | PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \ | |
e0c9e58b JP |
113 | \ |
114 | /* Logical router ingress stages. */ \ | |
115 | PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ | |
116 | PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \ | |
de297547 GS |
117 | PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \ |
118 | PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \ | |
119 | PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \ | |
120 | PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \ | |
121 | PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \ | |
e0c9e58b JP |
122 | \ |
123 | /* Logical router egress stages. */ \ | |
de297547 GS |
124 | PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \ |
125 | PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery") | |
880fcd14 BP |
126 | |
127 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
128 | S_##DP_TYPE##_##PIPELINE##_##STAGE \ | |
129 | = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE), | |
130 | PIPELINE_STAGES | |
131 | #undef PIPELINE_STAGE | |
091e3af9 JP |
132 | }; |
133 | ||
6bb4a18e JP |
134 | /* Due to various hard-coded priorities need to implement ACLs, the |
135 | * northbound database supports a smaller range of ACL priorities than | |
136 | * are available to logical flows. This value is added to an ACL | |
137 | * priority to determine the ACL's logical flow priority. */ | |
138 | #define OVN_ACL_PRI_OFFSET 1000 | |
139 | ||
facf8652 | 140 | #define REGBIT_CONNTRACK_DEFRAG "reg0[0]" |
fa313a8c | 141 | #define REGBIT_CONNTRACK_COMMIT "reg0[1]" |
7a15be69 | 142 | #define REGBIT_CONNTRACK_NAT "reg0[2]" |
facf8652 | 143 | |
880fcd14 BP |
144 | /* Returns an "enum ovn_stage" built from the arguments. */ |
145 | static enum ovn_stage | |
146 | ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline, | |
147 | uint8_t table) | |
148 | { | |
149 | return OVN_STAGE_BUILD(dp_type, pipeline, table); | |
150 | } | |
151 | ||
152 | /* Returns the pipeline to which 'stage' belongs. */ | |
153 | static enum ovn_pipeline | |
154 | ovn_stage_get_pipeline(enum ovn_stage stage) | |
155 | { | |
156 | return (stage >> 8) & 1; | |
157 | } | |
158 | ||
159 | /* Returns the table to which 'stage' belongs. */ | |
160 | static uint8_t | |
161 | ovn_stage_get_table(enum ovn_stage stage) | |
162 | { | |
163 | return stage & 0xff; | |
164 | } | |
165 | ||
166 | /* Returns a string name for 'stage'. */ | |
167 | static const char * | |
168 | ovn_stage_to_str(enum ovn_stage stage) | |
169 | { | |
170 | switch (stage) { | |
171 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
172 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME; | |
173 | PIPELINE_STAGES | |
174 | #undef PIPELINE_STAGE | |
175 | default: return "<unknown>"; | |
176 | } | |
177 | } | |
9a9961d2 BP |
178 | |
179 | /* Returns the type of the datapath to which a flow with the given 'stage' may | |
180 | * be added. */ | |
181 | static enum ovn_datapath_type | |
182 | ovn_stage_to_datapath_type(enum ovn_stage stage) | |
183 | { | |
184 | switch (stage) { | |
185 | #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \ | |
186 | case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE; | |
187 | PIPELINE_STAGES | |
188 | #undef PIPELINE_STAGE | |
189 | default: OVS_NOT_REACHED(); | |
190 | } | |
191 | } | |
880fcd14 | 192 | \f |
ac0630a2 RB |
193 | static void |
194 | usage(void) | |
195 | { | |
196 | printf("\ | |
197 | %s: OVN northbound management daemon\n\ | |
198 | usage: %s [OPTIONS]\n\ | |
199 | \n\ | |
200 | Options:\n\ | |
201 | --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\ | |
202 | (default: %s)\n\ | |
ec78987f | 203 | --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\ |
ac0630a2 RB |
204 | (default: %s)\n\ |
205 | -h, --help display this help message\n\ | |
206 | -o, --options list available options\n\ | |
207 | -V, --version display version information\n\ | |
60bdd011 | 208 | ", program_name, program_name, default_nb_db(), default_sb_db()); |
67d9b930 | 209 | daemon_usage(); |
ac0630a2 RB |
210 | vlog_usage(); |
211 | stream_usage("database", true, true, false); | |
212 | } | |
213 | \f | |
5868eb24 BP |
214 | struct tnlid_node { |
215 | struct hmap_node hmap_node; | |
216 | uint32_t tnlid; | |
217 | }; | |
218 | ||
219 | static void | |
220 | destroy_tnlids(struct hmap *tnlids) | |
4edcdcf4 | 221 | { |
4ec3d7c7 DDP |
222 | struct tnlid_node *node; |
223 | HMAP_FOR_EACH_POP (node, hmap_node, tnlids) { | |
5868eb24 BP |
224 | free(node); |
225 | } | |
226 | hmap_destroy(tnlids); | |
227 | } | |
228 | ||
229 | static void | |
230 | add_tnlid(struct hmap *set, uint32_t tnlid) | |
231 | { | |
232 | struct tnlid_node *node = xmalloc(sizeof *node); | |
233 | hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0)); | |
234 | node->tnlid = tnlid; | |
4edcdcf4 RB |
235 | } |
236 | ||
4edcdcf4 | 237 | static bool |
5868eb24 | 238 | tnlid_in_use(const struct hmap *set, uint32_t tnlid) |
4edcdcf4 | 239 | { |
5868eb24 BP |
240 | const struct tnlid_node *node; |
241 | HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) { | |
242 | if (node->tnlid == tnlid) { | |
243 | return true; | |
244 | } | |
245 | } | |
246 | return false; | |
247 | } | |
4edcdcf4 | 248 | |
5868eb24 BP |
249 | static uint32_t |
250 | allocate_tnlid(struct hmap *set, const char *name, uint32_t max, | |
251 | uint32_t *hint) | |
252 | { | |
253 | for (uint32_t tnlid = *hint + 1; tnlid != *hint; | |
254 | tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) { | |
255 | if (!tnlid_in_use(set, tnlid)) { | |
256 | add_tnlid(set, tnlid); | |
257 | *hint = tnlid; | |
258 | return tnlid; | |
259 | } | |
4edcdcf4 RB |
260 | } |
261 | ||
5868eb24 BP |
262 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); |
263 | VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name); | |
264 | return 0; | |
265 | } | |
266 | \f | |
9975d7be BP |
267 | /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or |
268 | * sb->external_ids:logical-switch. */ | |
5868eb24 BP |
269 | struct ovn_datapath { |
270 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be | 271 | struct uuid key; /* (nbs/nbr)->header_.uuid. */ |
4edcdcf4 | 272 | |
9975d7be BP |
273 | const struct nbrec_logical_switch *nbs; /* May be NULL. */ |
274 | const struct nbrec_logical_router *nbr; /* May be NULL. */ | |
5868eb24 | 275 | const struct sbrec_datapath_binding *sb; /* May be NULL. */ |
4edcdcf4 | 276 | |
5868eb24 | 277 | struct ovs_list list; /* In list of similar records. */ |
4edcdcf4 | 278 | |
9975d7be | 279 | /* Logical switch data. */ |
86e98048 BP |
280 | struct ovn_port **router_ports; |
281 | size_t n_router_ports; | |
9975d7be | 282 | |
5868eb24 BP |
283 | struct hmap port_tnlids; |
284 | uint32_t port_key_hint; | |
285 | ||
286 | bool has_unknown; | |
287 | }; | |
288 | ||
289 | static struct ovn_datapath * | |
290 | ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, | |
9975d7be BP |
291 | const struct nbrec_logical_switch *nbs, |
292 | const struct nbrec_logical_router *nbr, | |
5868eb24 BP |
293 | const struct sbrec_datapath_binding *sb) |
294 | { | |
295 | struct ovn_datapath *od = xzalloc(sizeof *od); | |
296 | od->key = *key; | |
297 | od->sb = sb; | |
9975d7be BP |
298 | od->nbs = nbs; |
299 | od->nbr = nbr; | |
5868eb24 BP |
300 | hmap_init(&od->port_tnlids); |
301 | od->port_key_hint = 0; | |
302 | hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); | |
303 | return od; | |
304 | } | |
305 | ||
306 | static void | |
307 | ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od) | |
308 | { | |
309 | if (od) { | |
310 | /* Don't remove od->list. It is used within build_datapaths() as a | |
311 | * private list and once we've exited that function it is not safe to | |
312 | * use it. */ | |
313 | hmap_remove(datapaths, &od->key_node); | |
314 | destroy_tnlids(&od->port_tnlids); | |
86e98048 | 315 | free(od->router_ports); |
5868eb24 BP |
316 | free(od); |
317 | } | |
318 | } | |
319 | ||
9a9961d2 BP |
320 | /* Returns 'od''s datapath type. */ |
321 | static enum ovn_datapath_type | |
322 | ovn_datapath_get_type(const struct ovn_datapath *od) | |
323 | { | |
324 | return od->nbs ? DP_SWITCH : DP_ROUTER; | |
325 | } | |
326 | ||
5868eb24 BP |
327 | static struct ovn_datapath * |
328 | ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid) | |
329 | { | |
330 | struct ovn_datapath *od; | |
331 | ||
332 | HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) { | |
333 | if (uuid_equals(uuid, &od->key)) { | |
334 | return od; | |
335 | } | |
336 | } | |
337 | return NULL; | |
338 | } | |
339 | ||
340 | static struct ovn_datapath * | |
341 | ovn_datapath_from_sbrec(struct hmap *datapaths, | |
342 | const struct sbrec_datapath_binding *sb) | |
343 | { | |
344 | struct uuid key; | |
345 | ||
9975d7be BP |
346 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
347 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
5868eb24 BP |
348 | return NULL; |
349 | } | |
350 | return ovn_datapath_find(datapaths, &key); | |
351 | } | |
352 | ||
5412db30 J |
353 | static bool |
354 | lrouter_is_enabled(const struct nbrec_logical_router *lrouter) | |
355 | { | |
356 | return !lrouter->enabled || *lrouter->enabled; | |
357 | } | |
358 | ||
5868eb24 BP |
359 | static void |
360 | join_datapaths(struct northd_context *ctx, struct hmap *datapaths, | |
361 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
362 | struct ovs_list *both) | |
363 | { | |
364 | hmap_init(datapaths); | |
417e7e66 BW |
365 | ovs_list_init(sb_only); |
366 | ovs_list_init(nb_only); | |
367 | ovs_list_init(both); | |
5868eb24 BP |
368 | |
369 | const struct sbrec_datapath_binding *sb, *sb_next; | |
370 | SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { | |
371 | struct uuid key; | |
9975d7be BP |
372 | if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && |
373 | !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { | |
374 | ovsdb_idl_txn_add_comment( | |
375 | ctx->ovnsb_txn, | |
376 | "deleting Datapath_Binding "UUID_FMT" that lacks " | |
377 | "external-ids:logical-switch and " | |
378 | "external-ids:logical-router", | |
379 | UUID_ARGS(&sb->header_.uuid)); | |
5868eb24 BP |
380 | sbrec_datapath_binding_delete(sb); |
381 | continue; | |
382 | } | |
383 | ||
384 | if (ovn_datapath_find(datapaths, &key)) { | |
385 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
9975d7be BP |
386 | VLOG_INFO_RL( |
387 | &rl, "deleting Datapath_Binding "UUID_FMT" with " | |
388 | "duplicate external-ids:logical-switch/router "UUID_FMT, | |
389 | UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key)); | |
5868eb24 BP |
390 | sbrec_datapath_binding_delete(sb); |
391 | continue; | |
392 | } | |
393 | ||
394 | struct ovn_datapath *od = ovn_datapath_create(datapaths, &key, | |
9975d7be | 395 | NULL, NULL, sb); |
417e7e66 | 396 | ovs_list_push_back(sb_only, &od->list); |
5868eb24 BP |
397 | } |
398 | ||
9975d7be BP |
399 | const struct nbrec_logical_switch *nbs; |
400 | NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) { | |
5868eb24 | 401 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
9975d7be | 402 | &nbs->header_.uuid); |
5868eb24 | 403 | if (od) { |
9975d7be | 404 | od->nbs = nbs; |
417e7e66 BW |
405 | ovs_list_remove(&od->list); |
406 | ovs_list_push_back(both, &od->list); | |
5868eb24 | 407 | } else { |
9975d7be BP |
408 | od = ovn_datapath_create(datapaths, &nbs->header_.uuid, |
409 | nbs, NULL, NULL); | |
417e7e66 | 410 | ovs_list_push_back(nb_only, &od->list); |
5868eb24 BP |
411 | } |
412 | } | |
9975d7be BP |
413 | |
414 | const struct nbrec_logical_router *nbr; | |
415 | NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) { | |
5412db30 J |
416 | if (!lrouter_is_enabled(nbr)) { |
417 | continue; | |
418 | } | |
419 | ||
9975d7be BP |
420 | struct ovn_datapath *od = ovn_datapath_find(datapaths, |
421 | &nbr->header_.uuid); | |
422 | if (od) { | |
423 | if (!od->nbs) { | |
424 | od->nbr = nbr; | |
417e7e66 BW |
425 | ovs_list_remove(&od->list); |
426 | ovs_list_push_back(both, &od->list); | |
9975d7be BP |
427 | } else { |
428 | /* Can't happen! */ | |
429 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
430 | VLOG_WARN_RL(&rl, | |
431 | "duplicate UUID "UUID_FMT" in OVN_Northbound", | |
432 | UUID_ARGS(&nbr->header_.uuid)); | |
433 | continue; | |
434 | } | |
435 | } else { | |
436 | od = ovn_datapath_create(datapaths, &nbr->header_.uuid, | |
437 | NULL, nbr, NULL); | |
417e7e66 | 438 | ovs_list_push_back(nb_only, &od->list); |
9975d7be | 439 | } |
9975d7be | 440 | } |
5868eb24 BP |
441 | } |
442 | ||
443 | static uint32_t | |
444 | ovn_datapath_allocate_key(struct hmap *dp_tnlids) | |
445 | { | |
446 | static uint32_t hint; | |
447 | return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint); | |
448 | } | |
449 | ||
0bac7164 BP |
450 | /* Updates the southbound Datapath_Binding table so that it contains the |
451 | * logical switches and routers specified by the northbound database. | |
452 | * | |
453 | * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical | |
454 | * switch and router. */ | |
5868eb24 BP |
455 | static void |
456 | build_datapaths(struct northd_context *ctx, struct hmap *datapaths) | |
457 | { | |
458 | struct ovs_list sb_only, nb_only, both; | |
459 | ||
460 | join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both); | |
461 | ||
417e7e66 | 462 | if (!ovs_list_is_empty(&nb_only)) { |
5868eb24 BP |
463 | /* First index the in-use datapath tunnel IDs. */ |
464 | struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); | |
465 | struct ovn_datapath *od; | |
466 | LIST_FOR_EACH (od, list, &both) { | |
467 | add_tnlid(&dp_tnlids, od->sb->tunnel_key); | |
468 | } | |
469 | ||
470 | /* Add southbound record for each unmatched northbound record. */ | |
471 | LIST_FOR_EACH (od, list, &nb_only) { | |
472 | uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids); | |
473 | if (!tunnel_key) { | |
474 | break; | |
475 | } | |
476 | ||
477 | od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn); | |
478 | ||
5868eb24 | 479 | char uuid_s[UUID_LEN + 1]; |
9975d7be BP |
480 | sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key)); |
481 | const char *key = od->nbs ? "logical-switch" : "logical-router"; | |
482 | const struct smap id = SMAP_CONST1(&id, key, uuid_s); | |
aaf881c6 | 483 | sbrec_datapath_binding_set_external_ids(od->sb, &id); |
5868eb24 BP |
484 | |
485 | sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key); | |
486 | } | |
487 | destroy_tnlids(&dp_tnlids); | |
488 | } | |
489 | ||
490 | /* Delete southbound records without northbound matches. */ | |
491 | struct ovn_datapath *od, *next; | |
492 | LIST_FOR_EACH_SAFE (od, next, list, &sb_only) { | |
417e7e66 | 493 | ovs_list_remove(&od->list); |
5868eb24 BP |
494 | sbrec_datapath_binding_delete(od->sb); |
495 | ovn_datapath_destroy(datapaths, od); | |
496 | } | |
497 | } | |
498 | \f | |
499 | struct ovn_port { | |
500 | struct hmap_node key_node; /* Index on 'key'. */ | |
9975d7be BP |
501 | char *key; /* nbs->name, nbr->name, sb->logical_port. */ |
502 | char *json_key; /* 'key', quoted for use in JSON. */ | |
5868eb24 | 503 | |
9975d7be BP |
504 | const struct sbrec_port_binding *sb; /* May be NULL. */ |
505 | ||
e93b43d6 | 506 | /* Logical switch port data. */ |
0ee00741 | 507 | const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */ |
e93b43d6 JP |
508 | |
509 | struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */ | |
510 | unsigned int n_lsp_addrs; | |
511 | ||
512 | struct lport_addresses *ps_addrs; /* Port security addresses. */ | |
513 | unsigned int n_ps_addrs; | |
514 | ||
9975d7be | 515 | /* Logical router port data. */ |
0ee00741 | 516 | const struct nbrec_logical_router_port *nbrp; /* May be NULL. */ |
e93b43d6 | 517 | |
4685e523 | 518 | struct lport_addresses lrp_networks; |
c9bdf7bd | 519 | |
ad386c3f BP |
520 | /* The port's peer: |
521 | * | |
522 | * - A switch port S of type "router" has a router port R as a peer, | |
523 | * and R in turn has S has its peer. | |
524 | * | |
525 | * - Two connected logical router ports have each other as peer. */ | |
9975d7be | 526 | struct ovn_port *peer; |
5868eb24 BP |
527 | |
528 | struct ovn_datapath *od; | |
529 | ||
530 | struct ovs_list list; /* In list of similar records. */ | |
531 | }; | |
532 | ||
533 | static struct ovn_port * | |
534 | ovn_port_create(struct hmap *ports, const char *key, | |
0ee00741 HK |
535 | const struct nbrec_logical_switch_port *nbsp, |
536 | const struct nbrec_logical_router_port *nbrp, | |
5868eb24 BP |
537 | const struct sbrec_port_binding *sb) |
538 | { | |
539 | struct ovn_port *op = xzalloc(sizeof *op); | |
9975d7be BP |
540 | |
541 | struct ds json_key = DS_EMPTY_INITIALIZER; | |
542 | json_string_escape(key, &json_key); | |
543 | op->json_key = ds_steal_cstr(&json_key); | |
544 | ||
545 | op->key = xstrdup(key); | |
5868eb24 | 546 | op->sb = sb; |
0ee00741 HK |
547 | op->nbsp = nbsp; |
548 | op->nbrp = nbrp; | |
5868eb24 BP |
549 | hmap_insert(ports, &op->key_node, hash_string(op->key, 0)); |
550 | return op; | |
551 | } | |
552 | ||
553 | static void | |
554 | ovn_port_destroy(struct hmap *ports, struct ovn_port *port) | |
555 | { | |
556 | if (port) { | |
557 | /* Don't remove port->list. It is used within build_ports() as a | |
558 | * private list and once we've exited that function it is not safe to | |
559 | * use it. */ | |
560 | hmap_remove(ports, &port->key_node); | |
e93b43d6 JP |
561 | |
562 | for (int i = 0; i < port->n_lsp_addrs; i++) { | |
563 | destroy_lport_addresses(&port->lsp_addrs[i]); | |
564 | } | |
565 | free(port->lsp_addrs); | |
566 | ||
567 | for (int i = 0; i < port->n_ps_addrs; i++) { | |
568 | destroy_lport_addresses(&port->ps_addrs[i]); | |
569 | } | |
570 | free(port->ps_addrs); | |
571 | ||
4685e523 | 572 | destroy_lport_addresses(&port->lrp_networks); |
9975d7be BP |
573 | free(port->json_key); |
574 | free(port->key); | |
5868eb24 BP |
575 | free(port); |
576 | } | |
577 | } | |
578 | ||
579 | static struct ovn_port * | |
580 | ovn_port_find(struct hmap *ports, const char *name) | |
581 | { | |
582 | struct ovn_port *op; | |
583 | ||
584 | HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) { | |
585 | if (!strcmp(op->key, name)) { | |
586 | return op; | |
587 | } | |
588 | } | |
589 | return NULL; | |
590 | } | |
591 | ||
592 | static uint32_t | |
593 | ovn_port_allocate_key(struct ovn_datapath *od) | |
594 | { | |
595 | return allocate_tnlid(&od->port_tnlids, "port", | |
596 | (1u << 15) - 1, &od->port_key_hint); | |
597 | } | |
598 | ||
599 | static void | |
600 | join_logical_ports(struct northd_context *ctx, | |
601 | struct hmap *datapaths, struct hmap *ports, | |
602 | struct ovs_list *sb_only, struct ovs_list *nb_only, | |
603 | struct ovs_list *both) | |
604 | { | |
605 | hmap_init(ports); | |
417e7e66 BW |
606 | ovs_list_init(sb_only); |
607 | ovs_list_init(nb_only); | |
608 | ovs_list_init(both); | |
5868eb24 BP |
609 | |
610 | const struct sbrec_port_binding *sb; | |
611 | SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) { | |
612 | struct ovn_port *op = ovn_port_create(ports, sb->logical_port, | |
9975d7be | 613 | NULL, NULL, sb); |
417e7e66 | 614 | ovs_list_push_back(sb_only, &op->list); |
5868eb24 BP |
615 | } |
616 | ||
617 | struct ovn_datapath *od; | |
618 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
619 | if (od->nbs) { |
620 | for (size_t i = 0; i < od->nbs->n_ports; i++) { | |
0ee00741 HK |
621 | const struct nbrec_logical_switch_port *nbsp |
622 | = od->nbs->ports[i]; | |
623 | struct ovn_port *op = ovn_port_find(ports, nbsp->name); | |
9975d7be | 624 | if (op) { |
0ee00741 | 625 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
626 | static struct vlog_rate_limit rl |
627 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
628 | VLOG_WARN_RL(&rl, "duplicate logical port %s", | |
0ee00741 | 629 | nbsp->name); |
9975d7be BP |
630 | continue; |
631 | } | |
0ee00741 | 632 | op->nbsp = nbsp; |
417e7e66 BW |
633 | ovs_list_remove(&op->list); |
634 | ovs_list_push_back(both, &op->list); | |
e93b43d6 JP |
635 | |
636 | /* This port exists due to a SB binding, but should | |
637 | * not have been initialized fully. */ | |
638 | ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs); | |
9975d7be | 639 | } else { |
0ee00741 | 640 | op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL); |
417e7e66 | 641 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
642 | } |
643 | ||
e93b43d6 | 644 | op->lsp_addrs |
0ee00741 HK |
645 | = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses); |
646 | for (size_t j = 0; j < nbsp->n_addresses; j++) { | |
647 | if (!strcmp(nbsp->addresses[j], "unknown")) { | |
e93b43d6 JP |
648 | continue; |
649 | } | |
0ee00741 | 650 | if (!extract_lsp_addresses(nbsp->addresses[j], |
e93b43d6 JP |
651 | &op->lsp_addrs[op->n_lsp_addrs])) { |
652 | static struct vlog_rate_limit rl | |
653 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
654 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical " | |
655 | "switch port addresses. No MAC " | |
656 | "address found", | |
0ee00741 | 657 | op->nbsp->addresses[j]); |
e93b43d6 JP |
658 | continue; |
659 | } | |
660 | op->n_lsp_addrs++; | |
661 | } | |
662 | ||
663 | op->ps_addrs | |
0ee00741 HK |
664 | = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security); |
665 | for (size_t j = 0; j < nbsp->n_port_security; j++) { | |
666 | if (!extract_lsp_addresses(nbsp->port_security[j], | |
e93b43d6 JP |
667 | &op->ps_addrs[op->n_ps_addrs])) { |
668 | static struct vlog_rate_limit rl | |
669 | = VLOG_RATE_LIMIT_INIT(1, 1); | |
670 | VLOG_INFO_RL(&rl, "invalid syntax '%s' in port " | |
671 | "security. No MAC address found", | |
0ee00741 | 672 | op->nbsp->port_security[j]); |
e93b43d6 JP |
673 | continue; |
674 | } | |
675 | op->n_ps_addrs++; | |
676 | } | |
677 | ||
9975d7be BP |
678 | op->od = od; |
679 | } | |
680 | } else { | |
681 | for (size_t i = 0; i < od->nbr->n_ports; i++) { | |
0ee00741 HK |
682 | const struct nbrec_logical_router_port *nbrp |
683 | = od->nbr->ports[i]; | |
9975d7be | 684 | |
4685e523 | 685 | struct lport_addresses lrp_networks; |
0ee00741 | 686 | if (!extract_lrp_networks(nbrp, &lrp_networks)) { |
9975d7be BP |
687 | static struct vlog_rate_limit rl |
688 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
0ee00741 | 689 | VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac); |
9975d7be BP |
690 | continue; |
691 | } | |
692 | ||
4685e523 | 693 | if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) { |
9975d7be BP |
694 | continue; |
695 | } | |
696 | ||
0ee00741 | 697 | struct ovn_port *op = ovn_port_find(ports, nbrp->name); |
9975d7be | 698 | if (op) { |
0ee00741 | 699 | if (op->nbsp || op->nbrp) { |
9975d7be BP |
700 | static struct vlog_rate_limit rl |
701 | = VLOG_RATE_LIMIT_INIT(5, 1); | |
702 | VLOG_WARN_RL(&rl, "duplicate logical router port %s", | |
0ee00741 | 703 | nbrp->name); |
9975d7be BP |
704 | continue; |
705 | } | |
0ee00741 | 706 | op->nbrp = nbrp; |
417e7e66 BW |
707 | ovs_list_remove(&op->list); |
708 | ovs_list_push_back(both, &op->list); | |
4685e523 JP |
709 | |
710 | /* This port exists but should not have been | |
711 | * initialized fully. */ | |
712 | ovs_assert(!op->lrp_networks.n_ipv4_addrs | |
713 | && !op->lrp_networks.n_ipv6_addrs); | |
9975d7be | 714 | } else { |
0ee00741 | 715 | op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL); |
417e7e66 | 716 | ovs_list_push_back(nb_only, &op->list); |
9975d7be BP |
717 | } |
718 | ||
4685e523 | 719 | op->lrp_networks = lrp_networks; |
9975d7be | 720 | op->od = od; |
5868eb24 | 721 | } |
9975d7be BP |
722 | } |
723 | } | |
724 | ||
725 | /* Connect logical router ports, and logical switch ports of type "router", | |
726 | * to their peers. */ | |
727 | struct ovn_port *op; | |
728 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 HK |
729 | if (op->nbsp && !strcmp(op->nbsp->type, "router")) { |
730 | const char *peer_name = smap_get(&op->nbsp->options, "router-port"); | |
9975d7be BP |
731 | if (!peer_name) { |
732 | continue; | |
733 | } | |
734 | ||
735 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 736 | if (!peer || !peer->nbrp) { |
9975d7be BP |
737 | continue; |
738 | } | |
739 | ||
740 | peer->peer = op; | |
741 | op->peer = peer; | |
86e98048 BP |
742 | op->od->router_ports = xrealloc( |
743 | op->od->router_ports, | |
744 | sizeof *op->od->router_ports * (op->od->n_router_ports + 1)); | |
745 | op->od->router_ports[op->od->n_router_ports++] = op; | |
0ee00741 | 746 | } else if (op->nbrp && op->nbrp->peer) { |
ad386c3f BP |
747 | struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer); |
748 | if (peer) { | |
749 | if (peer->nbrp) { | |
750 | op->peer = peer; | |
751 | } else { | |
752 | /* An ovn_port for a switch port of type "router" does have | |
753 | * a router port as its peer (see the case above for | |
754 | * "router" ports), but this is set via options:router-port | |
755 | * in Logical_Switch_Port and does not involve the | |
756 | * Logical_Router_Port's 'peer' column. */ | |
757 | static struct vlog_rate_limit rl = | |
758 | VLOG_RATE_LIMIT_INIT(5, 1); | |
759 | VLOG_WARN_RL(&rl, "Bad configuration: The peer of router " | |
760 | "port %s is a switch port", op->key); | |
761 | } | |
762 | } | |
5868eb24 BP |
763 | } |
764 | } | |
765 | } | |
766 | ||
767 | static void | |
768 | ovn_port_update_sbrec(const struct ovn_port *op) | |
769 | { | |
770 | sbrec_port_binding_set_datapath(op->sb, op->od->sb); | |
0ee00741 | 771 | if (op->nbrp) { |
c1645003 GS |
772 | /* If the router is for l3 gateway, it resides on a chassis |
773 | * and its port type is "gateway". */ | |
774 | const char *chassis = smap_get(&op->od->nbr->options, "chassis"); | |
775 | if (chassis) { | |
776 | sbrec_port_binding_set_type(op->sb, "gateway"); | |
777 | } else { | |
778 | sbrec_port_binding_set_type(op->sb, "patch"); | |
779 | } | |
9975d7be BP |
780 | |
781 | const char *peer = op->peer ? op->peer->key : "<error>"; | |
c1645003 GS |
782 | struct smap new; |
783 | smap_init(&new); | |
784 | smap_add(&new, "peer", peer); | |
785 | if (chassis) { | |
786 | smap_add(&new, "gateway-chassis", chassis); | |
787 | } | |
788 | sbrec_port_binding_set_options(op->sb, &new); | |
789 | smap_destroy(&new); | |
9975d7be BP |
790 | |
791 | sbrec_port_binding_set_parent_port(op->sb, NULL); | |
792 | sbrec_port_binding_set_tag(op->sb, NULL, 0); | |
793 | sbrec_port_binding_set_mac(op->sb, NULL, 0); | |
794 | } else { | |
0ee00741 HK |
795 | if (strcmp(op->nbsp->type, "router")) { |
796 | sbrec_port_binding_set_type(op->sb, op->nbsp->type); | |
797 | sbrec_port_binding_set_options(op->sb, &op->nbsp->options); | |
9975d7be | 798 | } else { |
c1645003 GS |
799 | const char *chassis = NULL; |
800 | if (op->peer && op->peer->od && op->peer->od->nbr) { | |
801 | chassis = smap_get(&op->peer->od->nbr->options, "chassis"); | |
802 | } | |
803 | ||
804 | /* A switch port connected to a gateway router is also of | |
805 | * type "gateway". */ | |
806 | if (chassis) { | |
807 | sbrec_port_binding_set_type(op->sb, "gateway"); | |
808 | } else { | |
809 | sbrec_port_binding_set_type(op->sb, "patch"); | |
810 | } | |
9975d7be | 811 | |
0ee00741 | 812 | const char *router_port = smap_get(&op->nbsp->options, |
9975d7be BP |
813 | "router-port"); |
814 | if (!router_port) { | |
815 | router_port = "<error>"; | |
816 | } | |
c1645003 GS |
817 | struct smap new; |
818 | smap_init(&new); | |
819 | smap_add(&new, "peer", router_port); | |
820 | if (chassis) { | |
821 | smap_add(&new, "gateway-chassis", chassis); | |
822 | } | |
823 | sbrec_port_binding_set_options(op->sb, &new); | |
824 | smap_destroy(&new); | |
9975d7be | 825 | } |
0ee00741 HK |
826 | sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name); |
827 | sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag); | |
828 | sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses, | |
829 | op->nbsp->n_addresses); | |
9975d7be | 830 | } |
5868eb24 BP |
831 | } |
832 | ||
0bac7164 | 833 | /* Updates the southbound Port_Binding table so that it contains the logical |
80f408f4 | 834 | * switch ports specified by the northbound database. |
0bac7164 BP |
835 | * |
836 | * Initializes 'ports' to contain a "struct ovn_port" for every logical port, | |
837 | * using the "struct ovn_datapath"s in 'datapaths' to look up logical | |
838 | * datapaths. */ | |
5868eb24 BP |
839 | static void |
840 | build_ports(struct northd_context *ctx, struct hmap *datapaths, | |
841 | struct hmap *ports) | |
842 | { | |
843 | struct ovs_list sb_only, nb_only, both; | |
844 | ||
845 | join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both); | |
846 | ||
847 | /* For logical ports that are in both databases, update the southbound | |
848 | * record based on northbound data. Also index the in-use tunnel_keys. */ | |
849 | struct ovn_port *op, *next; | |
850 | LIST_FOR_EACH_SAFE (op, next, list, &both) { | |
851 | ovn_port_update_sbrec(op); | |
852 | ||
853 | add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key); | |
854 | if (op->sb->tunnel_key > op->od->port_key_hint) { | |
855 | op->od->port_key_hint = op->sb->tunnel_key; | |
856 | } | |
857 | } | |
858 | ||
859 | /* Add southbound record for each unmatched northbound record. */ | |
860 | LIST_FOR_EACH_SAFE (op, next, list, &nb_only) { | |
861 | uint16_t tunnel_key = ovn_port_allocate_key(op->od); | |
862 | if (!tunnel_key) { | |
863 | continue; | |
864 | } | |
865 | ||
866 | op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn); | |
867 | ovn_port_update_sbrec(op); | |
868 | ||
869 | sbrec_port_binding_set_logical_port(op->sb, op->key); | |
870 | sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key); | |
871 | } | |
872 | ||
873 | /* Delete southbound records without northbound matches. */ | |
874 | LIST_FOR_EACH_SAFE(op, next, list, &sb_only) { | |
417e7e66 | 875 | ovs_list_remove(&op->list); |
5868eb24 BP |
876 | sbrec_port_binding_delete(op->sb); |
877 | ovn_port_destroy(ports, op); | |
878 | } | |
879 | } | |
880 | \f | |
881 | #define OVN_MIN_MULTICAST 32768 | |
882 | #define OVN_MAX_MULTICAST 65535 | |
883 | ||
884 | struct multicast_group { | |
885 | const char *name; | |
886 | uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */ | |
887 | }; | |
888 | ||
889 | #define MC_FLOOD "_MC_flood" | |
890 | static const struct multicast_group mc_flood = { MC_FLOOD, 65535 }; | |
891 | ||
892 | #define MC_UNKNOWN "_MC_unknown" | |
893 | static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 }; | |
894 | ||
895 | static bool | |
896 | multicast_group_equal(const struct multicast_group *a, | |
897 | const struct multicast_group *b) | |
898 | { | |
899 | return !strcmp(a->name, b->name) && a->key == b->key; | |
900 | } | |
901 | ||
902 | /* Multicast group entry. */ | |
903 | struct ovn_multicast { | |
904 | struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */ | |
905 | struct ovn_datapath *datapath; | |
906 | const struct multicast_group *group; | |
907 | ||
908 | struct ovn_port **ports; | |
909 | size_t n_ports, allocated_ports; | |
910 | }; | |
911 | ||
912 | static uint32_t | |
913 | ovn_multicast_hash(const struct ovn_datapath *datapath, | |
914 | const struct multicast_group *group) | |
915 | { | |
916 | return hash_pointer(datapath, group->key); | |
917 | } | |
918 | ||
919 | static struct ovn_multicast * | |
920 | ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath, | |
921 | const struct multicast_group *group) | |
922 | { | |
923 | struct ovn_multicast *mc; | |
924 | ||
925 | HMAP_FOR_EACH_WITH_HASH (mc, hmap_node, | |
926 | ovn_multicast_hash(datapath, group), mcgroups) { | |
927 | if (mc->datapath == datapath | |
928 | && multicast_group_equal(mc->group, group)) { | |
929 | return mc; | |
4edcdcf4 RB |
930 | } |
931 | } | |
5868eb24 BP |
932 | return NULL; |
933 | } | |
934 | ||
935 | static void | |
936 | ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group, | |
937 | struct ovn_port *port) | |
938 | { | |
939 | struct ovn_datapath *od = port->od; | |
940 | struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group); | |
941 | if (!mc) { | |
942 | mc = xmalloc(sizeof *mc); | |
943 | hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group)); | |
944 | mc->datapath = od; | |
945 | mc->group = group; | |
946 | mc->n_ports = 0; | |
947 | mc->allocated_ports = 4; | |
948 | mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports); | |
949 | } | |
950 | if (mc->n_ports >= mc->allocated_ports) { | |
951 | mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports, | |
952 | sizeof *mc->ports); | |
953 | } | |
954 | mc->ports[mc->n_ports++] = port; | |
955 | } | |
4edcdcf4 | 956 | |
5868eb24 BP |
957 | static void |
958 | ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc) | |
959 | { | |
960 | if (mc) { | |
961 | hmap_remove(mcgroups, &mc->hmap_node); | |
962 | free(mc->ports); | |
963 | free(mc); | |
964 | } | |
965 | } | |
4edcdcf4 | 966 | |
5868eb24 BP |
967 | static void |
968 | ovn_multicast_update_sbrec(const struct ovn_multicast *mc, | |
969 | const struct sbrec_multicast_group *sb) | |
970 | { | |
971 | struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports); | |
972 | for (size_t i = 0; i < mc->n_ports; i++) { | |
973 | ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb); | |
974 | } | |
975 | sbrec_multicast_group_set_ports(sb, ports, mc->n_ports); | |
976 | free(ports); | |
4edcdcf4 | 977 | } |
bd39395f | 978 | \f |
48605550 | 979 | /* Logical flow generation. |
bd39395f | 980 | * |
48605550 | 981 | * This code generates the Logical_Flow table in the southbound database, as a |
bd39395f BP |
982 | * function of most of the northbound database. |
983 | */ | |
984 | ||
5868eb24 BP |
985 | struct ovn_lflow { |
986 | struct hmap_node hmap_node; | |
bd39395f | 987 | |
5868eb24 | 988 | struct ovn_datapath *od; |
880fcd14 | 989 | enum ovn_stage stage; |
5868eb24 BP |
990 | uint16_t priority; |
991 | char *match; | |
992 | char *actions; | |
bd39395f BP |
993 | }; |
994 | ||
995 | static size_t | |
5868eb24 | 996 | ovn_lflow_hash(const struct ovn_lflow *lflow) |
bd39395f | 997 | { |
5868eb24 | 998 | size_t hash = uuid_hash(&lflow->od->key); |
880fcd14 | 999 | hash = hash_2words((lflow->stage << 16) | lflow->priority, hash); |
5868eb24 BP |
1000 | hash = hash_string(lflow->match, hash); |
1001 | return hash_string(lflow->actions, hash); | |
bd39395f BP |
1002 | } |
1003 | ||
5868eb24 BP |
1004 | static bool |
1005 | ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b) | |
1006 | { | |
1007 | return (a->od == b->od | |
880fcd14 | 1008 | && a->stage == b->stage |
5868eb24 BP |
1009 | && a->priority == b->priority |
1010 | && !strcmp(a->match, b->match) | |
1011 | && !strcmp(a->actions, b->actions)); | |
1012 | } | |
1013 | ||
1014 | static void | |
1015 | ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od, | |
880fcd14 | 1016 | enum ovn_stage stage, uint16_t priority, |
5868eb24 | 1017 | char *match, char *actions) |
bd39395f | 1018 | { |
5868eb24 | 1019 | lflow->od = od; |
880fcd14 | 1020 | lflow->stage = stage; |
5868eb24 BP |
1021 | lflow->priority = priority; |
1022 | lflow->match = match; | |
1023 | lflow->actions = actions; | |
bd39395f BP |
1024 | } |
1025 | ||
48605550 | 1026 | /* Adds a row with the specified contents to the Logical_Flow table. */ |
bd39395f | 1027 | static void |
5868eb24 | 1028 | ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od, |
880fcd14 | 1029 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
1030 | const char *match, const char *actions) |
1031 | { | |
9a9961d2 BP |
1032 | ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od)); |
1033 | ||
5868eb24 | 1034 | struct ovn_lflow *lflow = xmalloc(sizeof *lflow); |
880fcd14 | 1035 | ovn_lflow_init(lflow, od, stage, priority, |
5868eb24 BP |
1036 | xstrdup(match), xstrdup(actions)); |
1037 | hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow)); | |
1038 | } | |
1039 | ||
1040 | static struct ovn_lflow * | |
1041 | ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od, | |
880fcd14 | 1042 | enum ovn_stage stage, uint16_t priority, |
5868eb24 BP |
1043 | const char *match, const char *actions) |
1044 | { | |
1045 | struct ovn_lflow target; | |
880fcd14 | 1046 | ovn_lflow_init(&target, od, stage, priority, |
5868eb24 BP |
1047 | CONST_CAST(char *, match), CONST_CAST(char *, actions)); |
1048 | ||
1049 | struct ovn_lflow *lflow; | |
1050 | HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target), | |
1051 | lflows) { | |
1052 | if (ovn_lflow_equal(lflow, &target)) { | |
1053 | return lflow; | |
bd39395f BP |
1054 | } |
1055 | } | |
5868eb24 BP |
1056 | return NULL; |
1057 | } | |
bd39395f | 1058 | |
5868eb24 BP |
1059 | static void |
1060 | ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow) | |
1061 | { | |
1062 | if (lflow) { | |
1063 | hmap_remove(lflows, &lflow->hmap_node); | |
1064 | free(lflow->match); | |
1065 | free(lflow->actions); | |
1066 | free(lflow); | |
1067 | } | |
bd39395f BP |
1068 | } |
1069 | ||
bd39395f | 1070 | /* Appends port security constraints on L2 address field 'eth_addr_field' |
e93b43d6 JP |
1071 | * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs' |
1072 | * elements, is the collection of port_security constraints from an | |
1073 | * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */ | |
bd39395f | 1074 | static void |
685f4dfe | 1075 | build_port_security_l2(const char *eth_addr_field, |
e93b43d6 JP |
1076 | struct lport_addresses *ps_addrs, |
1077 | unsigned int n_ps_addrs, | |
685f4dfe | 1078 | struct ds *match) |
bd39395f | 1079 | { |
e93b43d6 JP |
1080 | if (!n_ps_addrs) { |
1081 | return; | |
1082 | } | |
bd39395f | 1083 | |
e93b43d6 | 1084 | ds_put_format(match, " && %s == {", eth_addr_field); |
f7cb14cd | 1085 | |
e93b43d6 JP |
1086 | for (size_t i = 0; i < n_ps_addrs; i++) { |
1087 | ds_put_format(match, "%s ", ps_addrs[i].ea_s); | |
bd39395f | 1088 | } |
f7cb14cd | 1089 | ds_chomp(match, ' '); |
bd39395f | 1090 | ds_put_cstr(match, "}"); |
bd39395f BP |
1091 | } |
1092 | ||
685f4dfe NS |
1093 | static void |
1094 | build_port_security_ipv6_nd_flow( | |
1095 | struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs, | |
1096 | int n_ipv6_addrs) | |
1097 | { | |
1098 | ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || " | |
1099 | "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || " | |
1100 | "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero), | |
1101 | ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero), | |
1102 | ETH_ADDR_ARGS(ea)); | |
1103 | if (!n_ipv6_addrs) { | |
1104 | ds_put_cstr(match, "))"); | |
1105 | return; | |
1106 | } | |
1107 | ||
1108 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
1109 | struct in6_addr lla; | |
1110 | in6_generate_lla(ea, &lla); | |
1111 | memset(ip6_str, 0, sizeof(ip6_str)); | |
1112 | ipv6_string_mapped(ip6_str, &lla); | |
1113 | ds_put_format(match, " && (nd.target == %s", ip6_str); | |
1114 | ||
1115 | for(int i = 0; i < n_ipv6_addrs; i++) { | |
1116 | memset(ip6_str, 0, sizeof(ip6_str)); | |
1117 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
1118 | ds_put_format(match, " || nd.target == %s", ip6_str); | |
1119 | } | |
1120 | ||
1121 | ds_put_format(match, ")))"); | |
1122 | } | |
1123 | ||
1124 | static void | |
1125 | build_port_security_ipv6_flow( | |
1126 | enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea, | |
1127 | struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs) | |
1128 | { | |
1129 | char ip6_str[INET6_ADDRSTRLEN + 1]; | |
1130 | ||
1131 | ds_put_format(match, " && %s == {", | |
1132 | pipeline == P_IN ? "ip6.src" : "ip6.dst"); | |
1133 | ||
1134 | /* Allow link-local address. */ | |
1135 | struct in6_addr lla; | |
1136 | in6_generate_lla(ea, &lla); | |
1137 | ipv6_string_mapped(ip6_str, &lla); | |
1138 | ds_put_format(match, "%s, ", ip6_str); | |
1139 | ||
9e687b23 DL |
1140 | /* Allow ip6.dst=ff00::/8 for multicast packets */ |
1141 | if (pipeline == P_OUT) { | |
1142 | ds_put_cstr(match, "ff00::/8, "); | |
1143 | } | |
685f4dfe NS |
1144 | for(int i = 0; i < n_ipv6_addrs; i++) { |
1145 | ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr); | |
9e687b23 | 1146 | ds_put_format(match, "%s, ", ip6_str); |
685f4dfe | 1147 | } |
9e687b23 DL |
1148 | /* Replace ", " by "}". */ |
1149 | ds_chomp(match, ' '); | |
1150 | ds_chomp(match, ','); | |
685f4dfe NS |
1151 | ds_put_cstr(match, "}"); |
1152 | } | |
1153 | ||
1154 | /** | |
1155 | * Build port security constraints on ARP and IPv6 ND fields | |
1156 | * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage. | |
1157 | * | |
1158 | * For each port security of the logical port, following | |
1159 | * logical flows are added | |
1160 | * - If the port security has no IP (both IPv4 and IPv6) or | |
1161 | * if it has IPv4 address(es) | |
1162 | * - Priority 90 flow to allow ARP packets for known MAC addresses | |
1163 | * in the eth.src and arp.spa fields. If the port security | |
1164 | * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field. | |
1165 | * | |
1166 | * - If the port security has no IP (both IPv4 and IPv6) or | |
1167 | * if it has IPv6 address(es) | |
1168 | * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses | |
1169 | * in the eth.src and nd.sll/nd.tll fields. If the port security | |
1170 | * has IPv6 addresses, allow known IPv6 addresses in the nd.target field | |
1171 | * for IPv6 Neighbor Advertisement packet. | |
1172 | * | |
1173 | * - Priority 80 flow to drop ARP and IPv6 ND packets. | |
1174 | */ | |
1175 | static void | |
1176 | build_port_security_nd(struct ovn_port *op, struct hmap *lflows) | |
1177 | { | |
e93b43d6 JP |
1178 | struct ds match = DS_EMPTY_INITIALIZER; |
1179 | ||
1180 | for (size_t i = 0; i < op->n_ps_addrs; i++) { | |
1181 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 1182 | |
e93b43d6 | 1183 | bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs); |
685f4dfe | 1184 | |
e93b43d6 JP |
1185 | ds_clear(&match); |
1186 | if (ps->n_ipv4_addrs || no_ip) { | |
1187 | ds_put_format(&match, | |
1188 | "inport == %s && eth.src == %s && arp.sha == %s", | |
1189 | op->json_key, ps->ea_s, ps->ea_s); | |
685f4dfe | 1190 | |
e93b43d6 JP |
1191 | if (ps->n_ipv4_addrs) { |
1192 | ds_put_cstr(&match, " && arp.spa == {"); | |
f95523c0 | 1193 | for (size_t j = 0; j < ps->n_ipv4_addrs; j++) { |
7d9d86ad NS |
1194 | /* When the netmask is applied, if the host portion is |
1195 | * non-zero, the host can only use the specified | |
1196 | * address in the arp.spa. If zero, the host is allowed | |
1197 | * to use any address in the subnet. */ | |
f95523c0 JP |
1198 | if (ps->ipv4_addrs[j].plen == 32 |
1199 | || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) { | |
1200 | ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s); | |
7d9d86ad | 1201 | } else { |
e93b43d6 | 1202 | ds_put_format(&match, "%s/%d", |
f95523c0 JP |
1203 | ps->ipv4_addrs[j].network_s, |
1204 | ps->ipv4_addrs[j].plen); | |
7d9d86ad | 1205 | } |
e93b43d6 | 1206 | ds_put_cstr(&match, ", "); |
685f4dfe NS |
1207 | } |
1208 | ds_chomp(&match, ' '); | |
e93b43d6 JP |
1209 | ds_chomp(&match, ','); |
1210 | ds_put_cstr(&match, "}"); | |
685f4dfe NS |
1211 | } |
1212 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, | |
1213 | ds_cstr(&match), "next;"); | |
685f4dfe NS |
1214 | } |
1215 | ||
e93b43d6 JP |
1216 | if (ps->n_ipv6_addrs || no_ip) { |
1217 | ds_clear(&match); | |
1218 | ds_put_format(&match, "inport == %s && eth.src == %s", | |
1219 | op->json_key, ps->ea_s); | |
1220 | build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs, | |
1221 | ps->n_ipv6_addrs); | |
685f4dfe NS |
1222 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90, |
1223 | ds_cstr(&match), "next;"); | |
685f4dfe | 1224 | } |
685f4dfe NS |
1225 | } |
1226 | ||
e93b43d6 JP |
1227 | ds_clear(&match); |
1228 | ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key); | |
685f4dfe | 1229 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80, |
e93b43d6 JP |
1230 | ds_cstr(&match), "drop;"); |
1231 | ds_destroy(&match); | |
685f4dfe NS |
1232 | } |
1233 | ||
1234 | /** | |
1235 | * Build port security constraints on IPv4 and IPv6 src and dst fields | |
1236 | * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage. | |
1237 | * | |
1238 | * For each port security of the logical port, following | |
1239 | * logical flows are added | |
1240 | * - If the port security has IPv4 addresses, | |
1241 | * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses | |
1242 | * | |
1243 | * - If the port security has IPv6 addresses, | |
1244 | * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses | |
1245 | * | |
1246 | * - If the port security has IPv4 addresses or IPv6 addresses or both | |
1247 | * - Priority 80 flow to drop all IPv4 and IPv6 traffic | |
1248 | */ | |
1249 | static void | |
1250 | build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, | |
1251 | struct hmap *lflows) | |
1252 | { | |
1253 | char *port_direction; | |
1254 | enum ovn_stage stage; | |
1255 | if (pipeline == P_IN) { | |
1256 | port_direction = "inport"; | |
1257 | stage = S_SWITCH_IN_PORT_SEC_IP; | |
1258 | } else { | |
1259 | port_direction = "outport"; | |
1260 | stage = S_SWITCH_OUT_PORT_SEC_IP; | |
1261 | } | |
1262 | ||
e93b43d6 JP |
1263 | for (size_t i = 0; i < op->n_ps_addrs; i++) { |
1264 | struct lport_addresses *ps = &op->ps_addrs[i]; | |
685f4dfe | 1265 | |
e93b43d6 | 1266 | if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) { |
685f4dfe NS |
1267 | continue; |
1268 | } | |
1269 | ||
e93b43d6 | 1270 | if (ps->n_ipv4_addrs) { |
685f4dfe NS |
1271 | struct ds match = DS_EMPTY_INITIALIZER; |
1272 | if (pipeline == P_IN) { | |
9e687b23 DL |
1273 | /* Permit use of the unspecified address for DHCP discovery */ |
1274 | struct ds dhcp_match = DS_EMPTY_INITIALIZER; | |
1275 | ds_put_format(&dhcp_match, "inport == %s" | |
e93b43d6 | 1276 | " && eth.src == %s" |
9e687b23 DL |
1277 | " && ip4.src == 0.0.0.0" |
1278 | " && ip4.dst == 255.255.255.255" | |
e93b43d6 JP |
1279 | " && udp.src == 68 && udp.dst == 67", |
1280 | op->json_key, ps->ea_s); | |
9e687b23 DL |
1281 | ovn_lflow_add(lflows, op->od, stage, 90, |
1282 | ds_cstr(&dhcp_match), "next;"); | |
1283 | ds_destroy(&dhcp_match); | |
e93b43d6 | 1284 | ds_put_format(&match, "inport == %s && eth.src == %s" |
9e687b23 | 1285 | " && ip4.src == {", op->json_key, |
e93b43d6 | 1286 | ps->ea_s); |
685f4dfe | 1287 | } else { |
e93b43d6 | 1288 | ds_put_format(&match, "outport == %s && eth.dst == %s" |
685f4dfe | 1289 | " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ", |
e93b43d6 | 1290 | op->json_key, ps->ea_s); |
685f4dfe NS |
1291 | } |
1292 | ||
f95523c0 JP |
1293 | for (int j = 0; j < ps->n_ipv4_addrs; j++) { |
1294 | ovs_be32 mask = ps->ipv4_addrs[j].mask; | |
7d9d86ad NS |
1295 | /* When the netmask is applied, if the host portion is |
1296 | * non-zero, the host can only use the specified | |
1297 | * address. If zero, the host is allowed to use any | |
1298 | * address in the subnet. | |
e93b43d6 | 1299 | */ |
f95523c0 JP |
1300 | if (ps->ipv4_addrs[j].plen == 32 |
1301 | || ps->ipv4_addrs[j].addr & ~mask) { | |
1302 | ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s); | |
1303 | if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) { | |
e93b43d6 JP |
1304 | /* Host is also allowed to receive packets to the |
1305 | * broadcast address in the specified subnet. */ | |
1306 | ds_put_format(&match, ", %s", | |
f95523c0 | 1307 | ps->ipv4_addrs[j].bcast_s); |
7d9d86ad NS |
1308 | } |
1309 | } else { | |
1310 | /* host portion is zero */ | |
f95523c0 JP |
1311 | ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s, |
1312 | ps->ipv4_addrs[j].plen); | |
7d9d86ad NS |
1313 | } |
1314 | ds_put_cstr(&match, ", "); | |
685f4dfe NS |
1315 | } |
1316 | ||
1317 | /* Replace ", " by "}". */ | |
1318 | ds_chomp(&match, ' '); | |
1319 | ds_chomp(&match, ','); | |
1320 | ds_put_cstr(&match, "}"); | |
1321 | ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;"); | |
1322 | ds_destroy(&match); | |
685f4dfe NS |
1323 | } |
1324 | ||
e93b43d6 | 1325 | if (ps->n_ipv6_addrs) { |
685f4dfe | 1326 | struct ds match = DS_EMPTY_INITIALIZER; |
9e687b23 DL |
1327 | if (pipeline == P_IN) { |
1328 | /* Permit use of unspecified address for duplicate address | |
1329 | * detection */ | |
1330 | struct ds dad_match = DS_EMPTY_INITIALIZER; | |
1331 | ds_put_format(&dad_match, "inport == %s" | |
e93b43d6 | 1332 | " && eth.src == %s" |
9e687b23 DL |
1333 | " && ip6.src == ::" |
1334 | " && ip6.dst == ff02::/16" | |
1335 | " && icmp6.type == {131, 135, 143}", op->json_key, | |
e93b43d6 | 1336 | ps->ea_s); |
9e687b23 DL |
1337 | ovn_lflow_add(lflows, op->od, stage, 90, |
1338 | ds_cstr(&dad_match), "next;"); | |
1339 | ds_destroy(&dad_match); | |
1340 | } | |
e93b43d6 | 1341 | ds_put_format(&match, "%s == %s && %s == %s", |
685f4dfe | 1342 | port_direction, op->json_key, |
e93b43d6 JP |
1343 | pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s); |
1344 | build_port_security_ipv6_flow(pipeline, &match, ps->ea, | |
1345 | ps->ipv6_addrs, ps->n_ipv6_addrs); | |
685f4dfe NS |
1346 | ovn_lflow_add(lflows, op->od, stage, 90, |
1347 | ds_cstr(&match), "next;"); | |
1348 | ds_destroy(&match); | |
685f4dfe NS |
1349 | } |
1350 | ||
e93b43d6 JP |
1351 | char *match = xasprintf("%s == %s && %s == %s && ip", |
1352 | port_direction, op->json_key, | |
1353 | pipeline == P_IN ? "eth.src" : "eth.dst", | |
1354 | ps->ea_s); | |
685f4dfe NS |
1355 | ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;"); |
1356 | free(match); | |
1357 | } | |
f2a715b5 | 1358 | |
685f4dfe NS |
1359 | } |
1360 | ||
95a9a275 | 1361 | static bool |
80f408f4 | 1362 | lsp_is_enabled(const struct nbrec_logical_switch_port *lsp) |
95a9a275 | 1363 | { |
80f408f4 | 1364 | return !lsp->enabled || *lsp->enabled; |
95a9a275 RB |
1365 | } |
1366 | ||
4c7bf534 | 1367 | static bool |
80f408f4 | 1368 | lsp_is_up(const struct nbrec_logical_switch_port *lsp) |
4c7bf534 | 1369 | { |
80f408f4 | 1370 | return !lsp->up || *lsp->up; |
4c7bf534 NS |
1371 | } |
1372 | ||
78aab811 JP |
1373 | static bool |
1374 | has_stateful_acl(struct ovn_datapath *od) | |
1375 | { | |
9975d7be BP |
1376 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
1377 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 JP |
1378 | if (!strcmp(acl->action, "allow-related")) { |
1379 | return true; | |
1380 | } | |
1381 | } | |
1382 | ||
1383 | return false; | |
1384 | } | |
1385 | ||
1386 | static void | |
2d018f9b GS |
1387 | build_pre_acls(struct ovn_datapath *od, struct hmap *lflows, |
1388 | struct hmap *ports) | |
78aab811 JP |
1389 | { |
1390 | bool has_stateful = has_stateful_acl(od); | |
48fcdb47 | 1391 | struct ovn_port *op; |
78aab811 JP |
1392 | |
1393 | /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are | |
1394 | * allowed by default. */ | |
880fcd14 BP |
1395 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;"); |
1396 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;"); | |
78aab811 | 1397 | |
78aab811 JP |
1398 | /* If there are any stateful ACL rules in this dapapath, we must |
1399 | * send all IP packets through the conntrack action, which handles | |
1400 | * defragmentation, in order to match L4 headers. */ | |
1401 | if (has_stateful) { | |
48fcdb47 | 1402 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 1403 | if (op->od == od && !strcmp(op->nbsp->type, "router")) { |
501f95e1 JP |
1404 | /* Can't use ct() for router ports. Consider the |
1405 | * following configuration: lp1(10.0.0.2) on | |
1406 | * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a | |
1407 | * ping from lp1 to lp2, First, the response will go | |
1408 | * through ct() with a zone for lp2 in the ls2 ingress | |
1409 | * pipeline on hostB. That ct zone knows about this | |
1410 | * connection. Next, it goes through ct() with the zone | |
1411 | * for the router port in the egress pipeline of ls2 on | |
1412 | * hostB. This zone does not know about the connection, | |
1413 | * as the icmp request went through the logical router | |
1414 | * on hostA, not hostB. This would only work with | |
1415 | * distributed conntrack state across all chassis. */ | |
1416 | struct ds match_in = DS_EMPTY_INITIALIZER; | |
1417 | struct ds match_out = DS_EMPTY_INITIALIZER; | |
1418 | ||
48fcdb47 WL |
1419 | ds_put_format(&match_in, "ip && inport == %s", op->json_key); |
1420 | ds_put_format(&match_out, "ip && outport == %s", op->json_key); | |
501f95e1 JP |
1421 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, |
1422 | ds_cstr(&match_in), "next;"); | |
1423 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, | |
1424 | ds_cstr(&match_out), "next;"); | |
48fcdb47 WL |
1425 | |
1426 | ds_destroy(&match_in); | |
1427 | ds_destroy(&match_out); | |
1428 | } | |
1429 | } | |
2d018f9b GS |
1430 | /* Ingress and Egress Pre-ACL Table (Priority 110). |
1431 | * | |
1432 | * Not to do conntrack on ND packets. */ | |
1433 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;"); | |
1434 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;"); | |
48fcdb47 | 1435 | |
78aab811 JP |
1436 | /* Ingress and Egress Pre-ACL Table (Priority 100). |
1437 | * | |
1438 | * Regardless of whether the ACL is "from-lport" or "to-lport", | |
1439 | * we need rules in both the ingress and egress table, because | |
facf8652 GS |
1440 | * the return traffic needs to be followed. |
1441 | * | |
1442 | * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send | |
1443 | * it to conntrack for tracking and defragmentation. */ | |
1444 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", | |
1445 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
1446 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", | |
1447 | REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
2d018f9b GS |
1448 | } |
1449 | } | |
78aab811 | 1450 | |
7a15be69 GS |
1451 | /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and |
1452 | * 'ip_address'. The caller must free() the memory allocated for | |
1453 | * 'ip_address'. */ | |
1454 | static void | |
1455 | ip_address_and_port_from_lb_key(const char *key, char **ip_address, | |
1456 | uint16_t *port) | |
1457 | { | |
1458 | char *ip_str, *start, *next; | |
1459 | *ip_address = NULL; | |
1460 | *port = 0; | |
1461 | ||
1462 | next = start = xstrdup(key); | |
1463 | ip_str = strsep(&next, ":"); | |
1464 | if (!ip_str || !ip_str[0]) { | |
1465 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1466 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
1467 | free(start); | |
1468 | return; | |
1469 | } | |
1470 | ||
1471 | ovs_be32 ip, mask; | |
1472 | char *error = ip_parse_masked(ip_str, &ip, &mask); | |
1473 | if (error || mask != OVS_BE32_MAX) { | |
1474 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1475 | VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key); | |
1476 | free(start); | |
1477 | free(error); | |
1478 | return; | |
1479 | } | |
1480 | ||
1481 | int l4_port = 0; | |
1482 | if (next && next[0]) { | |
1483 | if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) { | |
1484 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
1485 | VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key); | |
1486 | free(start); | |
1487 | return; | |
1488 | } | |
1489 | } | |
1490 | ||
1491 | *port = l4_port; | |
1492 | *ip_address = strdup(ip_str); | |
1493 | free(start); | |
1494 | } | |
1495 | ||
1496 | static void | |
1497 | build_pre_lb(struct ovn_datapath *od, struct hmap *lflows) | |
1498 | { | |
1499 | /* Allow all packets to go to next tables by default. */ | |
1500 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;"); | |
1501 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;"); | |
1502 | ||
1503 | struct sset all_ips = SSET_INITIALIZER(&all_ips); | |
1504 | if (od->nbs->load_balancer) { | |
1505 | struct nbrec_load_balancer *lb = od->nbs->load_balancer; | |
1506 | struct smap *vips = &lb->vips; | |
1507 | struct smap_node *node; | |
1508 | bool vip_configured = false; | |
1509 | ||
1510 | SMAP_FOR_EACH (node, vips) { | |
1511 | vip_configured = true; | |
1512 | ||
1513 | /* node->key contains IP:port or just IP. */ | |
1514 | char *ip_address = NULL; | |
1515 | uint16_t port; | |
1516 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
1517 | if (!ip_address) { | |
1518 | continue; | |
1519 | } | |
1520 | ||
1521 | if (!sset_contains(&all_ips, ip_address)) { | |
1522 | sset_add(&all_ips, ip_address); | |
1523 | } | |
1524 | ||
1525 | free(ip_address); | |
1526 | ||
1527 | /* Ignore L4 port information in the key because fragmented packets | |
1528 | * may not have L4 information. The pre-stateful table will send | |
1529 | * the packet through ct() action to de-fragment. In stateful | |
1530 | * table, we will eventually look at L4 information. */ | |
1531 | } | |
1532 | ||
1533 | /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send | |
1534 | * packet to conntrack for defragmentation. */ | |
1535 | const char *ip_address; | |
1536 | SSET_FOR_EACH(ip_address, &all_ips) { | |
1537 | char *match = xasprintf("ip && ip4.dst == %s", ip_address); | |
1538 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, | |
1539 | 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
1540 | free(match); | |
1541 | } | |
1542 | ||
1543 | sset_destroy(&all_ips); | |
1544 | ||
1545 | if (vip_configured) { | |
1546 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, | |
1547 | 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); | |
1548 | } | |
1549 | } | |
1550 | } | |
1551 | ||
facf8652 GS |
1552 | static void |
1553 | build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
1554 | { | |
1555 | /* Ingress and Egress pre-stateful Table (Priority 0): Packets are | |
1556 | * allowed by default. */ | |
1557 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;"); | |
1558 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;"); | |
1559 | ||
1560 | /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be | |
1561 | * sent to conntrack for tracking and defragmentation. */ | |
1562 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100, | |
1563 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
1564 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100, | |
1565 | REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;"); | |
1566 | } | |
1567 | ||
2d018f9b GS |
1568 | static void |
1569 | build_acls(struct ovn_datapath *od, struct hmap *lflows) | |
1570 | { | |
1571 | bool has_stateful = has_stateful_acl(od); | |
e75451fe | 1572 | |
2d018f9b GS |
1573 | /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by |
1574 | * default. A related rule at priority 1 is added below if there | |
1575 | * are any stateful ACLs in this datapath. */ | |
1576 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); | |
1577 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); | |
1578 | ||
1579 | if (has_stateful) { | |
78aab811 JP |
1580 | /* Ingress and Egress ACL Table (Priority 1). |
1581 | * | |
1582 | * By default, traffic is allowed. This is partially handled by | |
1583 | * the Priority 0 ACL flows added earlier, but we also need to | |
1584 | * commit IP flows. This is because, while the initiater's | |
1585 | * direction may not have any stateful rules, the server's may | |
1586 | * and then its return traffic would not have an associated | |
cc58e1f2 RB |
1587 | * conntrack entry and would return "+invalid". |
1588 | * | |
1589 | * We use "ct_commit" for a connection that is not already known | |
1590 | * by the connection tracker. Once a connection is committed, | |
1591 | * subsequent packets will hit the flow at priority 0 that just | |
1592 | * uses "next;" | |
1593 | * | |
1594 | * We also check for established connections that have ct_label[0] | |
1595 | * set on them. That's a connection that was disallowed, but is | |
1596 | * now allowed by policy again since it hit this default-allow flow. | |
1597 | * We need to set ct_label[0]=0 to let the connection continue, | |
1598 | * which will be done by ct_commit() in the "stateful" stage. | |
1599 | * Subsequent packets will hit the flow at priority 0 that just | |
1600 | * uses "next;". */ | |
1601 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, | |
1602 | "ip && (!ct.est || (ct.est && ct_label[0] == 1))", | |
1603 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
1604 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, | |
1605 | "ip && (!ct.est || (ct.est && ct_label[0] == 1))", | |
1606 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
78aab811 JP |
1607 | |
1608 | /* Ingress and Egress ACL Table (Priority 65535). | |
1609 | * | |
cc58e1f2 RB |
1610 | * Always drop traffic that's in an invalid state. Also drop |
1611 | * reply direction packets for connections that have been marked | |
1612 | * for deletion (bit 0 of ct_label is set). | |
1613 | * | |
1614 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 1615 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
1616 | "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)", |
1617 | "drop;"); | |
880fcd14 | 1618 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
1619 | "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)", |
1620 | "drop;"); | |
78aab811 JP |
1621 | |
1622 | /* Ingress and Egress ACL Table (Priority 65535). | |
1623 | * | |
cc58e1f2 RB |
1624 | * Allow reply traffic that is part of an established |
1625 | * conntrack entry that has not been marked for deletion | |
1626 | * (bit 0 of ct_label). We only match traffic in the | |
1627 | * reply direction because we want traffic in the request | |
1628 | * direction to hit the currently defined policy from ACLs. | |
1629 | * | |
1630 | * This is enforced at a higher priority than ACLs can be defined. */ | |
880fcd14 | 1631 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
1632 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
1633 | "&& ct.rpl && ct_label[0] == 0", | |
78aab811 | 1634 | "next;"); |
880fcd14 | 1635 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
1636 | "ct.est && !ct.rel && !ct.new && !ct.inv " |
1637 | "&& ct.rpl && ct_label[0] == 0", | |
78aab811 JP |
1638 | "next;"); |
1639 | ||
1640 | /* Ingress and Egress ACL Table (Priority 65535). | |
1641 | * | |
cc58e1f2 RB |
1642 | * Allow traffic that is related to an existing conntrack entry that |
1643 | * has not been marked for deletion (bit 0 of ct_label). | |
1644 | * | |
1645 | * This is enforced at a higher priority than ACLs can be defined. | |
78aab811 JP |
1646 | * |
1647 | * NOTE: This does not support related data sessions (eg, | |
1648 | * a dynamically negotiated FTP data channel), but will allow | |
1649 | * related traffic such as an ICMP Port Unreachable through | |
1650 | * that's generated from a non-listening UDP port. */ | |
880fcd14 | 1651 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, |
cc58e1f2 RB |
1652 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
1653 | "&& ct_label[0] == 0", | |
78aab811 | 1654 | "next;"); |
880fcd14 | 1655 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, |
cc58e1f2 RB |
1656 | "!ct.est && ct.rel && !ct.new && !ct.inv " |
1657 | "&& ct_label[0] == 0", | |
78aab811 | 1658 | "next;"); |
e75451fe ZKL |
1659 | |
1660 | /* Ingress and Egress ACL Table (Priority 65535). | |
1661 | * | |
1662 | * Not to do conntrack on ND packets. */ | |
1663 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;"); | |
1664 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;"); | |
78aab811 JP |
1665 | } |
1666 | ||
1667 | /* Ingress or Egress ACL Table (Various priorities). */ | |
9975d7be BP |
1668 | for (size_t i = 0; i < od->nbs->n_acls; i++) { |
1669 | struct nbrec_acl *acl = od->nbs->acls[i]; | |
78aab811 | 1670 | bool ingress = !strcmp(acl->direction, "from-lport") ? true :false; |
880fcd14 | 1671 | enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL; |
78aab811 | 1672 | |
cc58e1f2 RB |
1673 | if (!strcmp(acl->action, "allow") |
1674 | || !strcmp(acl->action, "allow-related")) { | |
78aab811 JP |
1675 | /* If there are any stateful flows, we must even commit "allow" |
1676 | * actions. This is because, while the initiater's | |
1677 | * direction may not have any stateful rules, the server's | |
1678 | * may and then its return traffic would not have an | |
1679 | * associated conntrack entry and would return "+invalid". */ | |
cc58e1f2 RB |
1680 | if (!has_stateful) { |
1681 | ovn_lflow_add(lflows, od, stage, | |
1682 | acl->priority + OVN_ACL_PRI_OFFSET, | |
1683 | acl->match, "next;"); | |
1684 | } else { | |
1685 | struct ds match = DS_EMPTY_INITIALIZER; | |
1686 | ||
1687 | /* Commit the connection tracking entry if it's a new | |
1688 | * connection that matches this ACL. After this commit, | |
1689 | * the reply traffic is allowed by a flow we create at | |
1690 | * priority 65535, defined earlier. | |
1691 | * | |
1692 | * It's also possible that a known connection was marked for | |
1693 | * deletion after a policy was deleted, but the policy was | |
1694 | * re-added while that connection is still known. We catch | |
1695 | * that case here and un-set ct_label[0] (which will be done | |
1696 | * by ct_commit in the "stateful" stage) to indicate that the | |
1697 | * connection should be allowed to resume. | |
1698 | */ | |
1699 | ds_put_format(&match, "((ct.new && !ct.est)" | |
1700 | " || (!ct.new && ct.est && !ct.rpl " | |
1701 | "&& ct_label[0] == 1)) " | |
1702 | "&& (%s)", acl->match); | |
1703 | ovn_lflow_add(lflows, od, stage, | |
1704 | acl->priority + OVN_ACL_PRI_OFFSET, | |
1705 | ds_cstr(&match), | |
1706 | REGBIT_CONNTRACK_COMMIT" = 1; next;"); | |
1707 | ||
1708 | /* Match on traffic in the request direction for an established | |
1709 | * connection tracking entry that has not been marked for | |
1710 | * deletion. There is no need to commit here, so we can just | |
1711 | * proceed to the next table. We use this to ensure that this | |
1712 | * connection is still allowed by the currently defined | |
1713 | * policy. */ | |
1714 | ds_clear(&match); | |
1715 | ds_put_format(&match, | |
1716 | "!ct.new && ct.est && !ct.rpl" | |
1717 | " && ct_label[0] == 0 && (%s)", | |
1718 | acl->match); | |
1719 | ovn_lflow_add(lflows, od, stage, | |
1720 | acl->priority + OVN_ACL_PRI_OFFSET, | |
1721 | ds_cstr(&match), "next;"); | |
1722 | ||
1723 | ds_destroy(&match); | |
1724 | } | |
1725 | } else if (!strcmp(acl->action, "drop") | |
1726 | || !strcmp(acl->action, "reject")) { | |
78aab811 JP |
1727 | struct ds match = DS_EMPTY_INITIALIZER; |
1728 | ||
cc58e1f2 RB |
1729 | /* XXX Need to support "reject", treat it as "drop;" for now. */ |
1730 | if (!strcmp(acl->action, "reject")) { | |
1731 | VLOG_INFO("reject is not a supported action"); | |
1732 | } | |
78aab811 | 1733 | |
cc58e1f2 RB |
1734 | /* The implementation of "drop" differs if stateful ACLs are in |
1735 | * use for this datapath. In that case, the actions differ | |
1736 | * depending on whether the connection was previously committed | |
1737 | * to the connection tracker with ct_commit. */ | |
1738 | if (has_stateful) { | |
1739 | /* If the packet is not part of an established connection, then | |
1740 | * we can simply drop it. */ | |
1741 | ds_put_format(&match, | |
1742 | "(!ct.est || (ct.est && ct_label[0] == 1)) " | |
1743 | "&& (%s)", | |
1744 | acl->match); | |
1745 | ovn_lflow_add(lflows, od, stage, acl->priority + | |
1746 | OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;"); | |
1747 | ||
1748 | /* For an existing connection without ct_label set, we've | |
1749 | * encountered a policy change. ACLs previously allowed | |
1750 | * this connection and we committed the connection tracking | |
1751 | * entry. Current policy says that we should drop this | |
1752 | * connection. First, we set bit 0 of ct_label to indicate | |
1753 | * that this connection is set for deletion. By not | |
1754 | * specifying "next;", we implicitly drop the packet after | |
1755 | * updating conntrack state. We would normally defer | |
1756 | * ct_commit() to the "stateful" stage, but since we're | |
1757 | * dropping the packet, we go ahead and do it here. */ | |
1758 | ds_clear(&match); | |
1759 | ds_put_format(&match, | |
1760 | "ct.est && ct_label[0] == 0 && (%s)", | |
1761 | acl->match); | |
1762 | ovn_lflow_add(lflows, od, stage, | |
1763 | acl->priority + OVN_ACL_PRI_OFFSET, | |
1764 | ds_cstr(&match), "ct_commit(ct_label=1/1);"); | |
1765 | ||
1766 | ds_destroy(&match); | |
1767 | } else { | |
1768 | /* There are no stateful ACLs in use on this datapath, | |
1769 | * so a "drop" ACL is simply the "drop" logical flow action | |
1770 | * in all cases. */ | |
1771 | ovn_lflow_add(lflows, od, stage, | |
1772 | acl->priority + OVN_ACL_PRI_OFFSET, | |
1773 | acl->match, "drop;"); | |
1774 | } | |
78aab811 JP |
1775 | } |
1776 | } | |
1777 | } | |
1778 | ||
7a15be69 GS |
1779 | static void |
1780 | build_lb(struct ovn_datapath *od, struct hmap *lflows) | |
1781 | { | |
1782 | /* Ingress and Egress LB Table (Priority 0): Packets are allowed by | |
1783 | * default. */ | |
1784 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;"); | |
1785 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;"); | |
1786 | ||
1787 | if (od->nbs->load_balancer) { | |
1788 | /* Ingress and Egress LB Table (Priority 65535). | |
1789 | * | |
1790 | * Send established traffic through conntrack for just NAT. */ | |
1791 | ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX, | |
1792 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
1793 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
1794 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX, | |
1795 | "ct.est && !ct.rel && !ct.new && !ct.inv", | |
1796 | REGBIT_CONNTRACK_NAT" = 1; next;"); | |
1797 | } | |
1798 | } | |
1799 | ||
fa313a8c GS |
1800 | static void |
1801 | build_stateful(struct ovn_datapath *od, struct hmap *lflows) | |
1802 | { | |
1803 | /* Ingress and Egress stateful Table (Priority 0): Packets are | |
1804 | * allowed by default. */ | |
1805 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;"); | |
1806 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;"); | |
1807 | ||
1808 | /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be | |
cc58e1f2 RB |
1809 | * committed to conntrack. We always set ct_label[0] to 0 here as |
1810 | * any packet that makes it this far is part of a connection we | |
1811 | * want to allow to continue. */ | |
fa313a8c | 1812 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, |
cc58e1f2 | 1813 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
fa313a8c | 1814 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, |
cc58e1f2 | 1815 | REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;"); |
7a15be69 GS |
1816 | |
1817 | /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent | |
1818 | * through nat (without committing). | |
1819 | * | |
1820 | * REGBIT_CONNTRACK_COMMIT is set for new connections and | |
1821 | * REGBIT_CONNTRACK_NAT is set for established connections. So they | |
1822 | * don't overlap. | |
1823 | */ | |
1824 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100, | |
1825 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
1826 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100, | |
1827 | REGBIT_CONNTRACK_NAT" == 1", "ct_lb;"); | |
1828 | ||
1829 | /* Load balancing rules for new connections get committed to conntrack | |
1830 | * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table | |
1831 | * a higher priority rule for load balancing below also commits the | |
1832 | * connection, so it is okay if we do not hit the above match on | |
1833 | * REGBIT_CONNTRACK_COMMIT. */ | |
1834 | if (od->nbs->load_balancer) { | |
1835 | struct nbrec_load_balancer *lb = od->nbs->load_balancer; | |
1836 | struct smap *vips = &lb->vips; | |
1837 | struct smap_node *node; | |
1838 | ||
1839 | SMAP_FOR_EACH (node, vips) { | |
1840 | uint16_t port = 0; | |
1841 | ||
1842 | /* node->key contains IP:port or just IP. */ | |
1843 | char *ip_address = NULL; | |
1844 | ip_address_and_port_from_lb_key(node->key, &ip_address, &port); | |
1845 | if (!ip_address) { | |
1846 | continue; | |
1847 | } | |
1848 | ||
1849 | /* New connections in Ingress table. */ | |
1850 | char *action = xasprintf("ct_lb(%s);", node->value); | |
1851 | struct ds match = DS_EMPTY_INITIALIZER; | |
1852 | ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address); | |
1853 | if (port) { | |
1854 | if (lb->protocol && !strcmp(lb->protocol, "udp")) { | |
1855 | ds_put_format(&match, "&& udp && udp.dst == %d", port); | |
1856 | } else { | |
1857 | ds_put_format(&match, "&& tcp && tcp.dst == %d", port); | |
1858 | } | |
1859 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
1860 | 120, ds_cstr(&match), action); | |
1861 | } else { | |
1862 | ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, | |
1863 | 110, ds_cstr(&match), action); | |
1864 | } | |
1865 | ||
1866 | ds_destroy(&match); | |
1867 | free(action); | |
1868 | } | |
1869 | } | |
fa313a8c GS |
1870 | } |
1871 | ||
bd39395f | 1872 | static void |
9975d7be BP |
1873 | build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, |
1874 | struct hmap *lflows, struct hmap *mcgroups) | |
bd39395f | 1875 | { |
5cff6b99 BP |
1876 | /* This flow table structure is documented in ovn-northd(8), so please |
1877 | * update ovn-northd.8.xml if you change anything. */ | |
1878 | ||
09b39248 JP |
1879 | struct ds match = DS_EMPTY_INITIALIZER; |
1880 | struct ds actions = DS_EMPTY_INITIALIZER; | |
1881 | ||
9975d7be | 1882 | /* Build pre-ACL and ACL tables for both ingress and egress. |
685f4dfe | 1883 | * Ingress tables 3 and 4. Egress tables 0 and 1. */ |
5868eb24 BP |
1884 | struct ovn_datapath *od; |
1885 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
1886 | if (!od->nbs) { |
1887 | continue; | |
1888 | } | |
1889 | ||
2d018f9b | 1890 | build_pre_acls(od, lflows, ports); |
7a15be69 | 1891 | build_pre_lb(od, lflows); |
facf8652 | 1892 | build_pre_stateful(od, lflows); |
2d018f9b | 1893 | build_acls(od, lflows); |
7a15be69 | 1894 | build_lb(od, lflows); |
fa313a8c | 1895 | build_stateful(od, lflows); |
9975d7be BP |
1896 | } |
1897 | ||
1898 | /* Logical switch ingress table 0: Admission control framework (priority | |
1899 | * 100). */ | |
1900 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
1901 | if (!od->nbs) { | |
1902 | continue; | |
1903 | } | |
1904 | ||
bd39395f | 1905 | /* Logical VLANs not supported. */ |
685f4dfe | 1906 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present", |
091e3af9 | 1907 | "drop;"); |
bd39395f BP |
1908 | |
1909 | /* Broadcast/multicast source address is invalid. */ | |
685f4dfe | 1910 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]", |
091e3af9 | 1911 | "drop;"); |
bd39395f | 1912 | |
35060cdc BP |
1913 | /* Port security flows have priority 50 (see below) and will continue |
1914 | * to the next table if packet source is acceptable. */ | |
bd39395f BP |
1915 | } |
1916 | ||
685f4dfe NS |
1917 | /* Logical switch ingress table 0: Ingress port security - L2 |
1918 | * (priority 50). | |
1919 | * Ingress table 1: Ingress port security - IP (priority 90 and 80) | |
1920 | * Ingress table 2: Ingress port security - ND (priority 90 and 80) | |
1921 | */ | |
5868eb24 BP |
1922 | struct ovn_port *op; |
1923 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 1924 | if (!op->nbsp) { |
9975d7be BP |
1925 | continue; |
1926 | } | |
1927 | ||
0ee00741 | 1928 | if (!lsp_is_enabled(op->nbsp)) { |
96af668a BP |
1929 | /* Drop packets from disabled logical ports (since logical flow |
1930 | * tables are default-drop). */ | |
1931 | continue; | |
1932 | } | |
1933 | ||
09b39248 | 1934 | ds_clear(&match); |
9975d7be | 1935 | ds_put_format(&match, "inport == %s", op->json_key); |
e93b43d6 JP |
1936 | build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs, |
1937 | &match); | |
685f4dfe | 1938 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50, |
96af668a | 1939 | ds_cstr(&match), "next;"); |
685f4dfe | 1940 | |
0ee00741 | 1941 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
1942 | build_port_security_ip(P_IN, op, lflows); |
1943 | build_port_security_nd(op, lflows); | |
1944 | } | |
1945 | } | |
1946 | ||
1947 | /* Ingress table 1 and 2: Port security - IP and ND, by default goto next. | |
1948 | * (priority 0)*/ | |
1949 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
1950 | if (!od->nbs) { | |
1951 | continue; | |
1952 | } | |
1953 | ||
1954 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;"); | |
1955 | ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); | |
5868eb24 | 1956 | } |
445a266a | 1957 | |
94300e09 | 1958 | /* Ingress table 9: ARP responder, skip requests coming from localnet ports. |
fa128126 HZ |
1959 | * (priority 100). */ |
1960 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 1961 | if (!op->nbsp) { |
fa128126 HZ |
1962 | continue; |
1963 | } | |
1964 | ||
0ee00741 | 1965 | if (!strcmp(op->nbsp->type, "localnet")) { |
09b39248 JP |
1966 | ds_clear(&match); |
1967 | ds_put_format(&match, "inport == %s", op->json_key); | |
e75451fe | 1968 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100, |
09b39248 | 1969 | ds_cstr(&match), "next;"); |
fa128126 HZ |
1970 | } |
1971 | } | |
1972 | ||
94300e09 | 1973 | /* Ingress table 9: ARP/ND responder, reply for known IPs. |
fa128126 | 1974 | * (priority 50). */ |
57d143eb | 1975 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 1976 | if (!op->nbsp) { |
57d143eb HZ |
1977 | continue; |
1978 | } | |
1979 | ||
4c7bf534 | 1980 | /* |
e75451fe | 1981 | * Add ARP/ND reply flows if either the |
4c7bf534 NS |
1982 | * - port is up or |
1983 | * - port type is router | |
1984 | */ | |
0ee00741 | 1985 | if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) { |
4c7bf534 NS |
1986 | continue; |
1987 | } | |
1988 | ||
e93b43d6 JP |
1989 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
1990 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { | |
09b39248 | 1991 | ds_clear(&match); |
e93b43d6 JP |
1992 | ds_put_format(&match, "arp.tpa == %s && arp.op == 1", |
1993 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
09b39248 JP |
1994 | ds_clear(&actions); |
1995 | ds_put_format(&actions, | |
57d143eb | 1996 | "eth.dst = eth.src; " |
e93b43d6 | 1997 | "eth.src = %s; " |
57d143eb HZ |
1998 | "arp.op = 2; /* ARP reply */ " |
1999 | "arp.tha = arp.sha; " | |
e93b43d6 | 2000 | "arp.sha = %s; " |
57d143eb | 2001 | "arp.tpa = arp.spa; " |
e93b43d6 | 2002 | "arp.spa = %s; " |
57d143eb HZ |
2003 | "outport = inport; " |
2004 | "inport = \"\"; /* Allow sending out inport. */ " | |
2005 | "output;", | |
e93b43d6 JP |
2006 | op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s, |
2007 | op->lsp_addrs[i].ipv4_addrs[j].addr_s); | |
e75451fe | 2008 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, |
09b39248 | 2009 | ds_cstr(&match), ds_cstr(&actions)); |
57d143eb | 2010 | } |
7dc88496 | 2011 | |
e93b43d6 | 2012 | if (op->lsp_addrs[i].n_ipv6_addrs > 0) { |
09b39248 | 2013 | ds_clear(&match); |
e75451fe | 2014 | ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && "); |
e93b43d6 JP |
2015 | if (op->lsp_addrs[i].n_ipv6_addrs == 1) { |
2016 | ds_put_format(&match, "nd.target == %s", | |
2017 | op->lsp_addrs[i].ipv6_addrs[0].addr_s); | |
e75451fe | 2018 | } else { |
e93b43d6 JP |
2019 | ds_put_format(&match, "nd.target == {"); |
2020 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) { | |
2021 | ds_put_cstr(&match, | |
2022 | op->lsp_addrs[i].ipv6_addrs[j].addr_s); | |
e75451fe ZKL |
2023 | } |
2024 | ds_chomp(&match, ' '); | |
e93b43d6 JP |
2025 | ds_chomp(&match, ','); |
2026 | ds_put_cstr(&match, "}"); | |
e75451fe | 2027 | } |
09b39248 JP |
2028 | ds_clear(&actions); |
2029 | ds_put_format(&actions, | |
e93b43d6 JP |
2030 | "na { eth.src = %s; " |
2031 | "nd.tll = %s; " | |
e75451fe ZKL |
2032 | "outport = inport; " |
2033 | "inport = \"\"; /* Allow sending out inport. */ " | |
2034 | "output; };", | |
e93b43d6 JP |
2035 | op->lsp_addrs[i].ea_s, |
2036 | op->lsp_addrs[i].ea_s); | |
e75451fe ZKL |
2037 | |
2038 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50, | |
09b39248 | 2039 | ds_cstr(&match), ds_cstr(&actions)); |
e75451fe | 2040 | |
e75451fe | 2041 | } |
57d143eb HZ |
2042 | } |
2043 | } | |
2044 | ||
94300e09 | 2045 | /* Ingress table 9: ARP/ND responder, by default goto next. |
fa128126 HZ |
2046 | * (priority 0)*/ |
2047 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2048 | if (!od->nbs) { | |
2049 | continue; | |
2050 | } | |
2051 | ||
e75451fe | 2052 | ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); |
fa128126 HZ |
2053 | } |
2054 | ||
94300e09 | 2055 | /* Ingress table 10: Destination lookup, broadcast and multicast handling |
5868eb24 BP |
2056 | * (priority 100). */ |
2057 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2058 | if (!op->nbsp) { |
9975d7be BP |
2059 | continue; |
2060 | } | |
2061 | ||
0ee00741 | 2062 | if (lsp_is_enabled(op->nbsp)) { |
9975d7be | 2063 | ovn_multicast_add(mcgroups, &mc_flood, op); |
445a266a | 2064 | } |
5868eb24 BP |
2065 | } |
2066 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
9975d7be BP |
2067 | if (!od->nbs) { |
2068 | continue; | |
2069 | } | |
2070 | ||
2071 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", | |
5868eb24 | 2072 | "outport = \""MC_FLOOD"\"; output;"); |
bd39395f | 2073 | } |
bd39395f | 2074 | |
94300e09 | 2075 | /* Ingress table 10: Destination lookup, unicast handling (priority 50), */ |
5868eb24 | 2076 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2077 | if (!op->nbsp) { |
9975d7be BP |
2078 | continue; |
2079 | } | |
2080 | ||
0ee00741 | 2081 | for (size_t i = 0; i < op->nbsp->n_addresses; i++) { |
74ff3298 | 2082 | struct eth_addr mac; |
5868eb24 | 2083 | |
0ee00741 | 2084 | if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) { |
09b39248 | 2085 | ds_clear(&match); |
9975d7be BP |
2086 | ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, |
2087 | ETH_ADDR_ARGS(mac)); | |
5868eb24 | 2088 | |
09b39248 | 2089 | ds_clear(&actions); |
9975d7be BP |
2090 | ds_put_format(&actions, "outport = %s; output;", op->json_key); |
2091 | ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, | |
5868eb24 | 2092 | ds_cstr(&match), ds_cstr(&actions)); |
0ee00741 HK |
2093 | } else if (!strcmp(op->nbsp->addresses[i], "unknown")) { |
2094 | if (lsp_is_enabled(op->nbsp)) { | |
9975d7be | 2095 | ovn_multicast_add(mcgroups, &mc_unknown, op); |
96af668a BP |
2096 | op->od->has_unknown = true; |
2097 | } | |
5868eb24 BP |
2098 | } else { |
2099 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); | |
445a266a | 2100 | |
2fa326a3 BP |
2101 | VLOG_INFO_RL(&rl, |
2102 | "%s: invalid syntax '%s' in addresses column", | |
0ee00741 | 2103 | op->nbsp->name, op->nbsp->addresses[i]); |
445a266a BP |
2104 | } |
2105 | } | |
bd39395f BP |
2106 | } |
2107 | ||
94300e09 | 2108 | /* Ingress table 10: Destination lookup for unknown MACs (priority 0). */ |
5868eb24 | 2109 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
2110 | if (!od->nbs) { |
2111 | continue; | |
2112 | } | |
2113 | ||
5868eb24 | 2114 | if (od->has_unknown) { |
9975d7be | 2115 | ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", |
5868eb24 | 2116 | "outport = \""MC_UNKNOWN"\"; output;"); |
445a266a | 2117 | } |
bd39395f BP |
2118 | } |
2119 | ||
94300e09 JP |
2120 | /* Egress tables 6: Egress port security - IP (priority 0) |
2121 | * Egress table 7: Egress port security L2 - multicast/broadcast | |
2122 | * (priority 100). */ | |
5868eb24 | 2123 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
2124 | if (!od->nbs) { |
2125 | continue; | |
2126 | } | |
2127 | ||
685f4dfe NS |
2128 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;"); |
2129 | ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast", | |
091e3af9 | 2130 | "output;"); |
48f42f3a RB |
2131 | } |
2132 | ||
94300e09 | 2133 | /* Egress table 6: Egress port security - IP (priorities 90 and 80) |
685f4dfe NS |
2134 | * if port security enabled. |
2135 | * | |
94300e09 | 2136 | * Egress table 7: Egress port security - L2 (priorities 50 and 150). |
d770a830 BP |
2137 | * |
2138 | * Priority 50 rules implement port security for enabled logical port. | |
2139 | * | |
2140 | * Priority 150 rules drop packets to disabled logical ports, so that they | |
2141 | * don't even receive multicast or broadcast packets. */ | |
5868eb24 | 2142 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2143 | if (!op->nbsp) { |
9975d7be BP |
2144 | continue; |
2145 | } | |
2146 | ||
09b39248 | 2147 | ds_clear(&match); |
9975d7be | 2148 | ds_put_format(&match, "outport == %s", op->json_key); |
0ee00741 | 2149 | if (lsp_is_enabled(op->nbsp)) { |
e93b43d6 JP |
2150 | build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs, |
2151 | &match); | |
685f4dfe | 2152 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50, |
d770a830 BP |
2153 | ds_cstr(&match), "output;"); |
2154 | } else { | |
685f4dfe | 2155 | ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150, |
d770a830 BP |
2156 | ds_cstr(&match), "drop;"); |
2157 | } | |
eb00399e | 2158 | |
0ee00741 | 2159 | if (op->nbsp->n_port_security) { |
685f4dfe NS |
2160 | build_port_security_ip(P_OUT, op, lflows); |
2161 | } | |
eb00399e | 2162 | } |
09b39248 JP |
2163 | |
2164 | ds_destroy(&match); | |
2165 | ds_destroy(&actions); | |
9975d7be | 2166 | } |
eb00399e | 2167 | |
9975d7be BP |
2168 | static bool |
2169 | lrport_is_enabled(const struct nbrec_logical_router_port *lrport) | |
2170 | { | |
2171 | return !lrport->enabled || *lrport->enabled; | |
2172 | } | |
2173 | ||
4685e523 JP |
2174 | /* Returns a string of the IP address of the router port 'op' that |
2175 | * overlaps with 'ip_s". If one is not found, returns NULL. | |
2176 | * | |
2177 | * The caller must not free the returned string. */ | |
2178 | static const char * | |
2179 | find_lrp_member_ip(const struct ovn_port *op, const char *ip_s) | |
2180 | { | |
6026f534 | 2181 | ovs_be32 ip; |
4685e523 JP |
2182 | |
2183 | if (!ip_parse(ip_s, &ip)) { | |
2184 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2185 | VLOG_WARN_RL(&rl, "bad ip address %s", ip_s); | |
2186 | return NULL; | |
2187 | } | |
2188 | ||
2189 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
2190 | const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i]; | |
2191 | ||
2192 | if (!((na->network ^ ip) & na->mask)) { | |
2193 | /* There should be only 1 interface that matches the | |
2194 | * next hop. Otherwise, it's a configuration error, | |
2195 | * because subnets of router's interfaces should NOT | |
2196 | * overlap. */ | |
2197 | return na->addr_s; | |
2198 | } | |
2199 | } | |
2200 | ||
2201 | return NULL; | |
2202 | } | |
2203 | ||
9975d7be | 2204 | static void |
0bac7164 | 2205 | add_route(struct hmap *lflows, const struct ovn_port *op, |
4685e523 JP |
2206 | const char *lrp_addr_s, const char *network_s, int plen, |
2207 | const char *gateway) | |
9975d7be | 2208 | { |
c9bdf7bd | 2209 | char *match = xasprintf("ip4.dst == %s/%d", network_s, plen); |
9975d7be BP |
2210 | |
2211 | struct ds actions = DS_EMPTY_INITIALIZER; | |
47f3b59b | 2212 | ds_put_cstr(&actions, "ip.ttl--; reg0 = "); |
9975d7be | 2213 | if (gateway) { |
c9bdf7bd | 2214 | ds_put_cstr(&actions, gateway); |
9975d7be BP |
2215 | } else { |
2216 | ds_put_cstr(&actions, "ip4.dst"); | |
2217 | } | |
4685e523 | 2218 | ds_put_format(&actions, "; " |
c9bdf7bd | 2219 | "reg1 = %s; " |
4685e523 | 2220 | "eth.src = %s; " |
0bac7164 | 2221 | "outport = %s; " |
4685e523 | 2222 | "inport = \"\"; /* Allow sending out inport. */ " |
0bac7164 | 2223 | "next;", |
4685e523 JP |
2224 | lrp_addr_s, |
2225 | op->lrp_networks.ea_s, | |
2226 | op->json_key); | |
9975d7be BP |
2227 | |
2228 | /* The priority here is calculated to implement longest-prefix-match | |
2229 | * routing. */ | |
c9bdf7bd JP |
2230 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match, |
2231 | ds_cstr(&actions)); | |
9975d7be BP |
2232 | ds_destroy(&actions); |
2233 | free(match); | |
2234 | } | |
2235 | ||
28dc3fe9 SR |
2236 | static void |
2237 | build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od, | |
2238 | struct hmap *ports, | |
2239 | const struct nbrec_logical_router_static_route *route) | |
2240 | { | |
4685e523 JP |
2241 | ovs_be32 prefix, nexthop, mask; |
2242 | const char *lrp_addr_s; | |
28dc3fe9 SR |
2243 | |
2244 | /* Verify that next hop is an IP address with 32 bits mask. */ | |
4685e523 | 2245 | char *error = ip_parse_masked(route->nexthop, &nexthop, &mask); |
28dc3fe9 SR |
2246 | if (error || mask != OVS_BE32_MAX) { |
2247 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2248 | VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop); | |
2249 | free(error); | |
2250 | return; | |
2251 | } | |
2252 | ||
2253 | /* Verify that ip prefix is a valid CIDR address. */ | |
2254 | error = ip_parse_masked(route->ip_prefix, &prefix, &mask); | |
2255 | if (error || !ip_is_cidr(mask)) { | |
2256 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
c9bdf7bd | 2257 | VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s", |
28dc3fe9 SR |
2258 | route->ip_prefix); |
2259 | free(error); | |
2260 | return; | |
2261 | } | |
2262 | ||
2263 | /* Find the outgoing port. */ | |
2264 | struct ovn_port *out_port = NULL; | |
2265 | if (route->output_port) { | |
2266 | out_port = ovn_port_find(ports, route->output_port); | |
2267 | if (!out_port) { | |
2268 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2269 | VLOG_WARN_RL(&rl, "Bad out port %s for static route %s", | |
2270 | route->output_port, route->ip_prefix); | |
2271 | return; | |
2272 | } | |
4685e523 | 2273 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
28dc3fe9 SR |
2274 | } else { |
2275 | /* output_port is not specified, find the | |
2276 | * router port matching the next hop. */ | |
2277 | int i; | |
2278 | for (i = 0; i < od->nbr->n_ports; i++) { | |
2279 | struct nbrec_logical_router_port *lrp = od->nbr->ports[i]; | |
2280 | out_port = ovn_port_find(ports, lrp->name); | |
2281 | if (!out_port) { | |
2282 | /* This should not happen. */ | |
2283 | continue; | |
2284 | } | |
2285 | ||
4685e523 JP |
2286 | lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop); |
2287 | if (lrp_addr_s) { | |
28dc3fe9 SR |
2288 | break; |
2289 | } | |
2290 | } | |
28dc3fe9 SR |
2291 | } |
2292 | ||
4685e523 JP |
2293 | if (!lrp_addr_s) { |
2294 | /* There is no matched out port. */ | |
2295 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2296 | VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s", | |
2297 | route->ip_prefix, route->nexthop); | |
2298 | return; | |
2299 | } | |
2300 | ||
2301 | char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & mask)); | |
2302 | add_route(lflows, out_port, lrp_addr_s, prefix_s, | |
2303 | ip_count_cidr_bits(mask), route->nexthop); | |
c9bdf7bd | 2304 | free(prefix_s); |
28dc3fe9 SR |
2305 | } |
2306 | ||
4685e523 JP |
2307 | static void |
2308 | op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast) | |
2309 | { | |
2310 | if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) { | |
2311 | ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s); | |
2312 | return; | |
2313 | } | |
2314 | ||
2315 | ds_put_cstr(ds, "{"); | |
2316 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
2317 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s); | |
2318 | if (add_bcast) { | |
2319 | ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s); | |
2320 | } | |
2321 | } | |
2322 | ds_chomp(ds, ' '); | |
2323 | ds_chomp(ds, ','); | |
2324 | ds_put_cstr(ds, "}"); | |
2325 | } | |
2326 | ||
9975d7be BP |
2327 | static void |
2328 | build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, | |
2329 | struct hmap *lflows) | |
2330 | { | |
2331 | /* This flow table structure is documented in ovn-northd(8), so please | |
2332 | * update ovn-northd.8.xml if you change anything. */ | |
2333 | ||
09b39248 JP |
2334 | struct ds match = DS_EMPTY_INITIALIZER; |
2335 | struct ds actions = DS_EMPTY_INITIALIZER; | |
2336 | ||
9975d7be BP |
2337 | /* Logical router ingress table 0: Admission control framework. */ |
2338 | struct ovn_datapath *od; | |
2339 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2340 | if (!od->nbr) { | |
2341 | continue; | |
2342 | } | |
2343 | ||
2344 | /* Logical VLANs not supported. | |
2345 | * Broadcast/multicast source address is invalid. */ | |
2346 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, | |
2347 | "vlan.present || eth.src[40]", "drop;"); | |
2348 | } | |
2349 | ||
2350 | /* Logical router ingress table 0: match (priority 50). */ | |
2351 | struct ovn_port *op; | |
2352 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2353 | if (!op->nbrp) { |
9975d7be BP |
2354 | continue; |
2355 | } | |
2356 | ||
0ee00741 | 2357 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
2358 | /* Drop packets from disabled logical ports (since logical flow |
2359 | * tables are default-drop). */ | |
2360 | continue; | |
2361 | } | |
2362 | ||
09b39248 | 2363 | ds_clear(&match); |
4685e523 JP |
2364 | ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s", |
2365 | op->lrp_networks.ea_s, op->json_key); | |
9975d7be | 2366 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, |
09b39248 | 2367 | ds_cstr(&match), "next;"); |
9975d7be BP |
2368 | } |
2369 | ||
2370 | /* Logical router ingress table 1: IP Input. */ | |
78aab811 | 2371 | HMAP_FOR_EACH (od, key_node, datapaths) { |
9975d7be BP |
2372 | if (!od->nbr) { |
2373 | continue; | |
2374 | } | |
2375 | ||
2376 | /* L3 admission control: drop multicast and broadcast source, localhost | |
2377 | * source or destination, and zero network source or destination | |
2378 | * (priority 100). */ | |
2379 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100, | |
2380 | "ip4.mcast || " | |
2381 | "ip4.src == 255.255.255.255 || " | |
2382 | "ip4.src == 127.0.0.0/8 || " | |
2383 | "ip4.dst == 127.0.0.0/8 || " | |
2384 | "ip4.src == 0.0.0.0/8 || " | |
2385 | "ip4.dst == 0.0.0.0/8", | |
2386 | "drop;"); | |
2387 | ||
0bac7164 BP |
2388 | /* ARP reply handling. Use ARP replies to populate the logical |
2389 | * router's ARP table. */ | |
2390 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2", | |
2391 | "put_arp(inport, arp.spa, arp.sha);"); | |
2392 | ||
9975d7be BP |
2393 | /* Drop Ethernet local broadcast. By definition this traffic should |
2394 | * not be forwarded.*/ | |
2395 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50, | |
2396 | "eth.bcast", "drop;"); | |
2397 | ||
9975d7be BP |
2398 | /* TTL discard. |
2399 | * | |
2400 | * XXX Need to send ICMP time exceeded if !ip.later_frag. */ | |
09b39248 JP |
2401 | ds_clear(&match); |
2402 | ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}"); | |
2403 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, | |
2404 | ds_cstr(&match), "drop;"); | |
9975d7be BP |
2405 | |
2406 | /* Pass other traffic not already handled to the next table for | |
2407 | * routing. */ | |
2408 | ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;"); | |
78aab811 JP |
2409 | } |
2410 | ||
9975d7be | 2411 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2412 | if (!op->nbrp) { |
9975d7be BP |
2413 | continue; |
2414 | } | |
2415 | ||
2416 | /* L3 admission control: drop packets that originate from an IP address | |
2417 | * owned by the router or a broadcast address known to the router | |
2418 | * (priority 100). */ | |
09b39248 | 2419 | ds_clear(&match); |
4685e523 JP |
2420 | ds_put_cstr(&match, "ip4.src == "); |
2421 | op_put_networks(&match, op, true); | |
9975d7be | 2422 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100, |
09b39248 | 2423 | ds_cstr(&match), "drop;"); |
9975d7be | 2424 | |
dd7652e6 | 2425 | /* ICMP echo reply. These flows reply to ICMP echo requests |
bb3c4568 FF |
2426 | * received for the router's IP address. Since packets only |
2427 | * get here as part of the logical router datapath, the inport | |
2428 | * (i.e. the incoming locally attached net) does not matter. | |
2429 | * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */ | |
09b39248 | 2430 | ds_clear(&match); |
4685e523 JP |
2431 | ds_put_cstr(&match, "ip4.dst == "); |
2432 | op_put_networks(&match, op, false); | |
2433 | ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0"); | |
2434 | ||
09b39248 JP |
2435 | ds_clear(&actions); |
2436 | ds_put_format(&actions, | |
4685e523 | 2437 | "ip4.dst <-> ip4.src; " |
dd7652e6 JP |
2438 | "ip.ttl = 255; " |
2439 | "icmp4.type = 0; " | |
2440 | "inport = \"\"; /* Allow sending out inport. */ " | |
4685e523 | 2441 | "next; "); |
dd7652e6 | 2442 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, |
09b39248 | 2443 | ds_cstr(&match), ds_cstr(&actions)); |
dd7652e6 | 2444 | |
9975d7be BP |
2445 | /* ARP reply. These flows reply to ARP requests for the router's own |
2446 | * IP address. */ | |
4685e523 JP |
2447 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
2448 | ds_clear(&match); | |
2449 | ds_put_format(&match, | |
2450 | "inport == %s && arp.tpa == %s && arp.op == 1", | |
2451 | op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s); | |
2452 | ||
2453 | ds_clear(&actions); | |
2454 | ds_put_format(&actions, | |
2455 | "eth.dst = eth.src; " | |
2456 | "eth.src = %s; " | |
2457 | "arp.op = 2; /* ARP reply */ " | |
2458 | "arp.tha = arp.sha; " | |
2459 | "arp.sha = %s; " | |
2460 | "arp.tpa = arp.spa; " | |
2461 | "arp.spa = %s; " | |
2462 | "outport = %s; " | |
2463 | "inport = \"\"; /* Allow sending out inport. */ " | |
2464 | "output;", | |
2465 | op->lrp_networks.ea_s, | |
2466 | op->lrp_networks.ea_s, | |
2467 | op->lrp_networks.ipv4_addrs[i].addr_s, | |
2468 | op->json_key); | |
2469 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
2470 | ds_cstr(&match), ds_cstr(&actions)); | |
2471 | } | |
9975d7be | 2472 | |
de297547 GS |
2473 | /* ARP handling for external IP addresses. |
2474 | * | |
2475 | * DNAT IP addresses are external IP addresses that need ARP | |
2476 | * handling. */ | |
2477 | for (int i = 0; i < op->od->nbr->n_nat; i++) { | |
2478 | const struct nbrec_nat *nat; | |
2479 | ||
2480 | nat = op->od->nbr->nat[i]; | |
2481 | ||
2482 | if(!strcmp(nat->type, "snat")) { | |
2483 | continue; | |
2484 | } | |
2485 | ||
2486 | ovs_be32 ip; | |
2487 | if (!ip_parse(nat->external_ip, &ip) || !ip) { | |
2488 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2489 | VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration " | |
2490 | "for router %s", nat->external_ip, op->key); | |
2491 | continue; | |
2492 | } | |
2493 | ||
09b39248 JP |
2494 | ds_clear(&match); |
2495 | ds_put_format(&match, | |
2496 | "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", | |
2497 | op->json_key, IP_ARGS(ip)); | |
4685e523 | 2498 | |
09b39248 JP |
2499 | ds_clear(&actions); |
2500 | ds_put_format(&actions, | |
de297547 | 2501 | "eth.dst = eth.src; " |
4685e523 | 2502 | "eth.src = %s; " |
de297547 GS |
2503 | "arp.op = 2; /* ARP reply */ " |
2504 | "arp.tha = arp.sha; " | |
4685e523 | 2505 | "arp.sha = %s; " |
de297547 GS |
2506 | "arp.tpa = arp.spa; " |
2507 | "arp.spa = "IP_FMT"; " | |
2508 | "outport = %s; " | |
2509 | "inport = \"\"; /* Allow sending out inport. */ " | |
2510 | "output;", | |
4685e523 JP |
2511 | op->lrp_networks.ea_s, |
2512 | op->lrp_networks.ea_s, | |
de297547 GS |
2513 | IP_ARGS(ip), |
2514 | op->json_key); | |
2515 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90, | |
09b39248 | 2516 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
2517 | } |
2518 | ||
4ef48e9d CSV |
2519 | /* Drop IP traffic to this router, unless the router ip is used as |
2520 | * SNAT ip. */ | |
4685e523 JP |
2521 | ovs_be32 *nat_ips = xmalloc(sizeof *nat_ips * op->od->nbr->n_nat); |
2522 | size_t n_nat_ips = 0; | |
4ef48e9d CSV |
2523 | for (int i = 0; i < op->od->nbr->n_nat; i++) { |
2524 | const struct nbrec_nat *nat; | |
2525 | ovs_be32 ip; | |
2526 | ||
2527 | nat = op->od->nbr->nat[i]; | |
2528 | if (strcmp(nat->type, "snat")) { | |
2529 | continue; | |
2530 | } | |
2531 | ||
2532 | if (!ip_parse(nat->external_ip, &ip) || !ip) { | |
2533 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2534 | VLOG_WARN_RL(&rl, "bad ip address %s in snat configuration " | |
2535 | "for router %s", nat->external_ip, op->key); | |
2536 | continue; | |
2537 | } | |
2538 | ||
4685e523 JP |
2539 | nat_ips[n_nat_ips++] = ip; |
2540 | } | |
2541 | ||
2542 | ds_clear(&match); | |
2543 | ds_put_cstr(&match, "ip4.dst == {"); | |
2544 | bool has_drop_ips = false; | |
2545 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { | |
2546 | for (int j = 0; j < n_nat_ips; j++) { | |
2547 | if (op->lrp_networks.ipv4_addrs[i].addr == nat_ips[j]) { | |
2548 | continue; | |
2549 | } | |
4ef48e9d | 2550 | } |
4685e523 JP |
2551 | ds_put_format(&match, "%s, ", |
2552 | op->lrp_networks.ipv4_addrs[i].addr_s); | |
2553 | has_drop_ips = true; | |
4ef48e9d | 2554 | } |
4685e523 JP |
2555 | ds_chomp(&match, ' '); |
2556 | ds_chomp(&match, ','); | |
2557 | ds_put_cstr(&match, "}"); | |
4ef48e9d | 2558 | |
4685e523 JP |
2559 | if (has_drop_ips) { |
2560 | /* Drop IP traffic to this router. */ | |
09b39248 JP |
2561 | ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60, |
2562 | ds_cstr(&match), "drop;"); | |
4ef48e9d | 2563 | } |
4685e523 JP |
2564 | |
2565 | free(nat_ips); | |
9975d7be BP |
2566 | } |
2567 | ||
de297547 GS |
2568 | /* NAT in Gateway routers. */ |
2569 | HMAP_FOR_EACH (od, key_node, datapaths) { | |
2570 | if (!od->nbr) { | |
2571 | continue; | |
2572 | } | |
2573 | ||
2574 | /* Packets are allowed by default. */ | |
2575 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;"); | |
2576 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;"); | |
2577 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;"); | |
2578 | ||
2579 | /* NAT rules are only valid on Gateway routers. */ | |
2580 | if (!smap_get(&od->nbr->options, "chassis")) { | |
2581 | continue; | |
2582 | } | |
2583 | ||
2584 | for (int i = 0; i < od->nbr->n_nat; i++) { | |
2585 | const struct nbrec_nat *nat; | |
2586 | ||
2587 | nat = od->nbr->nat[i]; | |
2588 | ||
2589 | ovs_be32 ip, mask; | |
2590 | ||
2591 | char *error = ip_parse_masked(nat->external_ip, &ip, &mask); | |
2592 | if (error || mask != OVS_BE32_MAX) { | |
2593 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); | |
2594 | VLOG_WARN_RL(&rl, "bad external ip %s for nat", | |
2595 | nat->external_ip); | |
2596 | free(error); | |
2597 | continue; | |
2598 | } | |
2599 | ||
2600 | /* Check the validity of nat->logical_ip. 'logical_ip' can | |
2601 | * be a subnet when the type is "snat". */ | |
2602 | error = ip_parse_masked(nat->logical_ip, &ip, &mask); | |
2603 | if (!strcmp(nat->type, "snat")) { | |
2604 | if (error) { | |
2605 | static struct vlog_rate_limit rl = | |
2606 | VLOG_RATE_LIMIT_INIT(5, 1); | |
2607 | VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat " | |
2608 | "in router "UUID_FMT"", | |
2609 | nat->logical_ip, UUID_ARGS(&od->key)); | |
2610 | free(error); | |
2611 | continue; | |
2612 | } | |
2613 | } else { | |
2614 | if (error || mask != OVS_BE32_MAX) { | |
2615 | static struct vlog_rate_limit rl = | |
2616 | VLOG_RATE_LIMIT_INIT(5, 1); | |
2617 | VLOG_WARN_RL(&rl, "bad ip %s for dnat in router " | |
2618 | ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key)); | |
2619 | free(error); | |
2620 | continue; | |
2621 | } | |
2622 | } | |
2623 | ||
de297547 GS |
2624 | /* Ingress UNSNAT table: It is for already established connections' |
2625 | * reverse traffic. i.e., SNAT has already been done in egress | |
2626 | * pipeline and now the packet has entered the ingress pipeline as | |
2627 | * part of a reply. We undo the SNAT here. | |
2628 | * | |
2629 | * Undoing SNAT has to happen before DNAT processing. This is | |
2630 | * because when the packet was DNATed in ingress pipeline, it did | |
2631 | * not know about the possibility of eventual additional SNAT in | |
2632 | * egress pipeline. */ | |
2633 | if (!strcmp(nat->type, "snat") | |
2634 | || !strcmp(nat->type, "dnat_and_snat")) { | |
09b39248 JP |
2635 | ds_clear(&match); |
2636 | ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip); | |
de297547 | 2637 | ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100, |
09b39248 | 2638 | ds_cstr(&match), "ct_snat; next;"); |
de297547 GS |
2639 | } |
2640 | ||
2641 | /* Ingress DNAT table: Packets enter the pipeline with destination | |
2642 | * IP address that needs to be DNATted from a external IP address | |
2643 | * to a logical IP address. */ | |
2644 | if (!strcmp(nat->type, "dnat") | |
2645 | || !strcmp(nat->type, "dnat_and_snat")) { | |
2646 | /* Packet when it goes from the initiator to destination. | |
2647 | * We need to zero the inport because the router can | |
2648 | * send the packet back through the same interface. */ | |
09b39248 JP |
2649 | ds_clear(&match); |
2650 | ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip); | |
2651 | ds_clear(&actions); | |
2652 | ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);", | |
2653 | nat->logical_ip); | |
de297547 | 2654 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100, |
09b39248 | 2655 | ds_cstr(&match), ds_cstr(&actions)); |
de297547 GS |
2656 | } |
2657 | ||
2658 | /* Egress SNAT table: Packets enter the egress pipeline with | |
2659 | * source ip address that needs to be SNATted to a external ip | |
2660 | * address. */ | |
2661 | if (!strcmp(nat->type, "snat") | |
2662 | || !strcmp(nat->type, "dnat_and_snat")) { | |
09b39248 JP |
2663 | ds_clear(&match); |
2664 | ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip); | |
2665 | ds_clear(&actions); | |
2666 | ds_put_format(&actions, "ct_snat(%s);", nat->external_ip); | |
de297547 GS |
2667 | |
2668 | /* The priority here is calculated such that the | |
2669 | * nat->logical_ip with the longest mask gets a higher | |
2670 | * priority. */ | |
2671 | ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, | |
09b39248 JP |
2672 | count_1bits(ntohl(mask)) + 1, |
2673 | ds_cstr(&match), ds_cstr(&actions)); | |
de297547 GS |
2674 | } |
2675 | } | |
2676 | ||
2677 | /* Re-circulate every packet through the DNAT zone. | |
2678 | * This helps with two things. | |
2679 | * | |
2680 | * 1. Any packet that needs to be unDNATed in the reverse | |
2681 | * direction gets unDNATed. Ideally this could be done in | |
2682 | * the egress pipeline. But since the gateway router | |
2683 | * does not have any feature that depends on the source | |
2684 | * ip address being external IP address for IP routing, | |
2685 | * we can do it here, saving a future re-circulation. | |
2686 | * | |
2687 | * 2. Any packet that was sent through SNAT zone in the | |
2688 | * previous table automatically gets re-circulated to get | |
2689 | * back the new destination IP address that is needed for | |
2690 | * routing in the openflow pipeline. */ | |
2691 | ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50, | |
2692 | "ip", "inport = \"\"; ct_dnat;"); | |
2693 | } | |
2694 | ||
94300e09 | 2695 | /* Logical router ingress table 4: IP Routing. |
9975d7be BP |
2696 | * |
2697 | * A packet that arrives at this table is an IP packet that should be | |
0bac7164 BP |
2698 | * routed to the address in ip4.dst. This table sets outport to the correct |
2699 | * output port, eth.src to the output port's MAC address, and reg0 to the | |
2700 | * next-hop IP address (leaving ip4.dst, the packet’s final destination, | |
2701 | * unchanged), and advances to the next table for ARP resolution. */ | |
9975d7be | 2702 | HMAP_FOR_EACH (op, key_node, ports) { |
0ee00741 | 2703 | if (!op->nbrp) { |
9975d7be BP |
2704 | continue; |
2705 | } | |
2706 | ||
4685e523 JP |
2707 | for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { |
2708 | add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s, | |
2709 | op->lrp_networks.ipv4_addrs[i].network_s, | |
2710 | op->lrp_networks.ipv4_addrs[i].plen, NULL); | |
2711 | } | |
9975d7be | 2712 | } |
4685e523 | 2713 | |
9975d7be BP |
2714 | HMAP_FOR_EACH (od, key_node, datapaths) { |
2715 | if (!od->nbr) { | |
2716 | continue; | |
2717 | } | |
2718 | ||
28dc3fe9 SR |
2719 | /* Convert the static routes to flows. */ |
2720 | for (int i = 0; i < od->nbr->n_static_routes; i++) { | |
2721 | const struct nbrec_logical_router_static_route *route; | |
2722 | ||
2723 | route = od->nbr->static_routes[i]; | |
2724 | build_static_route_flow(lflows, od, ports, route); | |
2725 | } | |
9975d7be BP |
2726 | } |
2727 | /* XXX destination unreachable */ | |
2728 | ||
94300e09 | 2729 | /* Local router ingress table 5: ARP Resolution. |
9975d7be BP |
2730 | * |
2731 | * Any packet that reaches this table is an IP packet whose next-hop IP | |
2732 | * address is in reg0. (ip4.dst is the final destination.) This table | |
2733 | * resolves the IP address in reg0 into an output port in outport and an | |
2734 | * Ethernet address in eth.dst. */ | |
2735 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2736 | if (op->nbrp) { |
509afdc3 GS |
2737 | /* This is a logical router port. If next-hop IP address in 'reg0' |
2738 | * matches ip address of this router port, then the packet is | |
2739 | * intended to eventually be sent to this logical port. Set the | |
2740 | * destination mac address using this port's mac address. | |
2741 | * | |
2742 | * The packet is still in peer's logical pipeline. So the match | |
2743 | * should be on peer's outport. */ | |
cd150899 | 2744 | if (op->peer && op->peer->nbrp) { |
09b39248 | 2745 | ds_clear(&match); |
4685e523 | 2746 | ds_put_format(&match, "outport == %s && reg0 == ", |
cd150899 | 2747 | op->peer->json_key); |
4685e523 JP |
2748 | op_put_networks(&match, op, false); |
2749 | ||
09b39248 | 2750 | ds_clear(&actions); |
4685e523 JP |
2751 | ds_put_format(&actions, "eth.dst = %s; next;", |
2752 | op->lrp_networks.ea_s); | |
cd150899 | 2753 | ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE, |
09b39248 | 2754 | 100, ds_cstr(&match), ds_cstr(&actions)); |
509afdc3 | 2755 | } |
0ee00741 | 2756 | } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
2757 | /* This is a logical switch port that backs a VM or a container. |
2758 | * Extract its addresses. For each of the address, go through all | |
2759 | * the router ports attached to the switch (to which this port | |
2760 | * connects) and if the address in question is reachable from the | |
2761 | * router port, add an ARP entry in that router's pipeline. */ | |
2762 | ||
e93b43d6 | 2763 | for (size_t i = 0; i < op->n_lsp_addrs; i++) { |
4685e523 | 2764 | const char *ea_s = op->lsp_addrs[i].ea_s; |
e93b43d6 | 2765 | for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) { |
4685e523 | 2766 | const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s; |
e93b43d6 | 2767 | for (size_t k = 0; k < op->od->n_router_ports; k++) { |
80f408f4 JP |
2768 | /* Get the Logical_Router_Port that the |
2769 | * Logical_Switch_Port is connected to, as | |
2770 | * 'peer'. */ | |
86e98048 | 2771 | const char *peer_name = smap_get( |
0ee00741 | 2772 | &op->od->router_ports[k]->nbsp->options, |
86e98048 BP |
2773 | "router-port"); |
2774 | if (!peer_name) { | |
2775 | continue; | |
2776 | } | |
2777 | ||
e93b43d6 | 2778 | struct ovn_port *peer = ovn_port_find(ports, peer_name); |
0ee00741 | 2779 | if (!peer || !peer->nbrp) { |
86e98048 BP |
2780 | continue; |
2781 | } | |
2782 | ||
4685e523 | 2783 | if (!find_lrp_member_ip(peer, ip_s)) { |
86e98048 BP |
2784 | continue; |
2785 | } | |
2786 | ||
09b39248 | 2787 | ds_clear(&match); |
e93b43d6 | 2788 | ds_put_format(&match, "outport == %s && reg0 == %s", |
4685e523 JP |
2789 | peer->json_key, ip_s); |
2790 | ||
09b39248 | 2791 | ds_clear(&actions); |
4685e523 | 2792 | ds_put_format(&actions, "eth.dst = %s; next;", ea_s); |
86e98048 | 2793 | ovn_lflow_add(lflows, peer->od, |
09b39248 JP |
2794 | S_ROUTER_IN_ARP_RESOLVE, 100, |
2795 | ds_cstr(&match), ds_cstr(&actions)); | |
86e98048 | 2796 | } |
9975d7be BP |
2797 | } |
2798 | } | |
0ee00741 | 2799 | } else if (!strcmp(op->nbsp->type, "router")) { |
75cf9d2b GS |
2800 | /* This is a logical switch port that connects to a router. */ |
2801 | ||
2802 | /* The peer of this switch port is the router port for which | |
2803 | * we need to add logical flows such that it can resolve | |
2804 | * ARP entries for all the other router ports connected to | |
2805 | * the switch in question. */ | |
2806 | ||
0ee00741 | 2807 | const char *peer_name = smap_get(&op->nbsp->options, |
75cf9d2b GS |
2808 | "router-port"); |
2809 | if (!peer_name) { | |
2810 | continue; | |
2811 | } | |
2812 | ||
2813 | struct ovn_port *peer = ovn_port_find(ports, peer_name); | |
0ee00741 | 2814 | if (!peer || !peer->nbrp) { |
75cf9d2b GS |
2815 | continue; |
2816 | } | |
2817 | ||
4685e523 | 2818 | for (size_t i = 0; i < op->od->n_router_ports; i++) { |
75cf9d2b | 2819 | const char *router_port_name = smap_get( |
0ee00741 | 2820 | &op->od->router_ports[i]->nbsp->options, |
75cf9d2b GS |
2821 | "router-port"); |
2822 | struct ovn_port *router_port = ovn_port_find(ports, | |
2823 | router_port_name); | |
0ee00741 | 2824 | if (!router_port || !router_port->nbrp) { |
75cf9d2b GS |
2825 | continue; |
2826 | } | |
2827 | ||
2828 | /* Skip the router port under consideration. */ | |
2829 | if (router_port == peer) { | |
2830 | continue; | |
2831 | } | |
2832 | ||
09b39248 | 2833 | ds_clear(&match); |
4685e523 JP |
2834 | ds_put_format(&match, "outport == %s && reg0 == ", |
2835 | peer->json_key); | |
2836 | op_put_networks(&match, router_port, false); | |
2837 | ||
09b39248 | 2838 | ds_clear(&actions); |
4685e523 JP |
2839 | ds_put_format(&actions, "eth.dst = %s; next;", |
2840 | router_port->lrp_networks.ea_s); | |
75cf9d2b | 2841 | ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE, |
09b39248 | 2842 | 100, ds_cstr(&match), ds_cstr(&actions)); |
75cf9d2b | 2843 | } |
9975d7be BP |
2844 | } |
2845 | } | |
75cf9d2b | 2846 | |
0bac7164 BP |
2847 | HMAP_FOR_EACH (od, key_node, datapaths) { |
2848 | if (!od->nbr) { | |
2849 | continue; | |
2850 | } | |
2851 | ||
2852 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1", | |
2853 | "get_arp(outport, reg0); next;"); | |
2854 | } | |
2855 | ||
94300e09 | 2856 | /* Local router ingress table 6: ARP request. |
0bac7164 BP |
2857 | * |
2858 | * In the common case where the Ethernet destination has been resolved, | |
94300e09 JP |
2859 | * this table outputs the packet (priority 0). Otherwise, it composes |
2860 | * and sends an ARP request (priority 100). */ | |
0bac7164 BP |
2861 | HMAP_FOR_EACH (od, key_node, datapaths) { |
2862 | if (!od->nbr) { | |
2863 | continue; | |
2864 | } | |
2865 | ||
2866 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100, | |
2867 | "eth.dst == 00:00:00:00:00:00", | |
2868 | "arp { " | |
2869 | "eth.dst = ff:ff:ff:ff:ff:ff; " | |
2870 | "arp.spa = reg1; " | |
2871 | "arp.op = 1; " /* ARP request */ | |
2872 | "output; " | |
2873 | "};"); | |
2874 | ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;"); | |
2875 | } | |
9975d7be | 2876 | |
de297547 | 2877 | /* Logical router egress table 1: Delivery (priority 100). |
9975d7be BP |
2878 | * |
2879 | * Priority 100 rules deliver packets to enabled logical ports. */ | |
2880 | HMAP_FOR_EACH (op, key_node, ports) { | |
0ee00741 | 2881 | if (!op->nbrp) { |
9975d7be BP |
2882 | continue; |
2883 | } | |
2884 | ||
0ee00741 | 2885 | if (!lrport_is_enabled(op->nbrp)) { |
9975d7be BP |
2886 | /* Drop packets to disabled logical ports (since logical flow |
2887 | * tables are default-drop). */ | |
2888 | continue; | |
2889 | } | |
2890 | ||
09b39248 JP |
2891 | ds_clear(&match); |
2892 | ds_put_format(&match, "outport == %s", op->json_key); | |
9975d7be | 2893 | ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, |
09b39248 | 2894 | ds_cstr(&match), "output;"); |
9975d7be | 2895 | } |
09b39248 JP |
2896 | |
2897 | ds_destroy(&match); | |
2898 | ds_destroy(&actions); | |
9975d7be BP |
2899 | } |
2900 | ||
2901 | /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, | |
2902 | * constructing their contents based on the OVN_NB database. */ | |
2903 | static void | |
2904 | build_lflows(struct northd_context *ctx, struct hmap *datapaths, | |
2905 | struct hmap *ports) | |
2906 | { | |
2907 | struct hmap lflows = HMAP_INITIALIZER(&lflows); | |
2908 | struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); | |
2909 | ||
2910 | build_lswitch_flows(datapaths, ports, &lflows, &mcgroups); | |
2911 | build_lrouter_flows(datapaths, ports, &lflows); | |
2912 | ||
5868eb24 BP |
2913 | /* Push changes to the Logical_Flow table to database. */ |
2914 | const struct sbrec_logical_flow *sbflow, *next_sbflow; | |
2915 | SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { | |
2916 | struct ovn_datapath *od | |
2917 | = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath); | |
2918 | if (!od) { | |
2919 | sbrec_logical_flow_delete(sbflow); | |
2920 | continue; | |
eb00399e | 2921 | } |
eb00399e | 2922 | |
9975d7be | 2923 | enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER; |
880fcd14 BP |
2924 | enum ovn_pipeline pipeline |
2925 | = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT; | |
5868eb24 | 2926 | struct ovn_lflow *lflow = ovn_lflow_find( |
880fcd14 BP |
2927 | &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id), |
2928 | sbflow->priority, sbflow->match, sbflow->actions); | |
5868eb24 BP |
2929 | if (lflow) { |
2930 | ovn_lflow_destroy(&lflows, lflow); | |
2931 | } else { | |
2932 | sbrec_logical_flow_delete(sbflow); | |
4edcdcf4 RB |
2933 | } |
2934 | } | |
5868eb24 BP |
2935 | struct ovn_lflow *lflow, *next_lflow; |
2936 | HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) { | |
880fcd14 BP |
2937 | enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage); |
2938 | uint8_t table = ovn_stage_get_table(lflow->stage); | |
2939 | ||
5868eb24 BP |
2940 | sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn); |
2941 | sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb); | |
9975d7be BP |
2942 | sbrec_logical_flow_set_pipeline( |
2943 | sbflow, pipeline == P_IN ? "ingress" : "egress"); | |
880fcd14 | 2944 | sbrec_logical_flow_set_table_id(sbflow, table); |
5868eb24 BP |
2945 | sbrec_logical_flow_set_priority(sbflow, lflow->priority); |
2946 | sbrec_logical_flow_set_match(sbflow, lflow->match); | |
2947 | sbrec_logical_flow_set_actions(sbflow, lflow->actions); | |
091e3af9 | 2948 | |
880fcd14 BP |
2949 | const struct smap ids = SMAP_CONST1(&ids, "stage-name", |
2950 | ovn_stage_to_str(lflow->stage)); | |
aaf881c6 | 2951 | sbrec_logical_flow_set_external_ids(sbflow, &ids); |
091e3af9 | 2952 | |
5868eb24 | 2953 | ovn_lflow_destroy(&lflows, lflow); |
eb00399e | 2954 | } |
5868eb24 BP |
2955 | hmap_destroy(&lflows); |
2956 | ||
2957 | /* Push changes to the Multicast_Group table to database. */ | |
2958 | const struct sbrec_multicast_group *sbmc, *next_sbmc; | |
2959 | SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) { | |
2960 | struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths, | |
2961 | sbmc->datapath); | |
2962 | if (!od) { | |
2963 | sbrec_multicast_group_delete(sbmc); | |
2964 | continue; | |
2965 | } | |
eb00399e | 2966 | |
5868eb24 BP |
2967 | struct multicast_group group = { .name = sbmc->name, |
2968 | .key = sbmc->tunnel_key }; | |
2969 | struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group); | |
2970 | if (mc) { | |
2971 | ovn_multicast_update_sbrec(mc, sbmc); | |
2972 | ovn_multicast_destroy(&mcgroups, mc); | |
2973 | } else { | |
2974 | sbrec_multicast_group_delete(sbmc); | |
2975 | } | |
2976 | } | |
2977 | struct ovn_multicast *mc, *next_mc; | |
2978 | HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) { | |
2979 | sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn); | |
2980 | sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb); | |
2981 | sbrec_multicast_group_set_name(sbmc, mc->group->name); | |
2982 | sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key); | |
2983 | ovn_multicast_update_sbrec(mc, sbmc); | |
2984 | ovn_multicast_destroy(&mcgroups, mc); | |
4edcdcf4 | 2985 | } |
5868eb24 | 2986 | hmap_destroy(&mcgroups); |
4edcdcf4 | 2987 | } |
ea382567 RB |
2988 | |
2989 | /* OVN_Northbound and OVN_Southbound have an identical Address_Set table. | |
2990 | * We always update OVN_Southbound to match the current data in | |
2991 | * OVN_Northbound, so that the address sets used in Logical_Flows in | |
2992 | * OVN_Southbound is checked against the proper set.*/ | |
2993 | static void | |
2994 | sync_address_sets(struct northd_context *ctx) | |
2995 | { | |
2996 | struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); | |
2997 | ||
2998 | const struct sbrec_address_set *sb_address_set; | |
2999 | SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) { | |
3000 | shash_add(&sb_address_sets, sb_address_set->name, sb_address_set); | |
3001 | } | |
3002 | ||
3003 | const struct nbrec_address_set *nb_address_set; | |
3004 | NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) { | |
3005 | sb_address_set = shash_find_and_delete(&sb_address_sets, | |
3006 | nb_address_set->name); | |
3007 | if (!sb_address_set) { | |
3008 | sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn); | |
3009 | sbrec_address_set_set_name(sb_address_set, nb_address_set->name); | |
3010 | } | |
3011 | ||
3012 | sbrec_address_set_set_addresses(sb_address_set, | |
3013 | /* "char **" is not compatible with "const char **" */ | |
3014 | (const char **) nb_address_set->addresses, | |
3015 | nb_address_set->n_addresses); | |
3016 | } | |
3017 | ||
3018 | struct shash_node *node, *next; | |
3019 | SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) { | |
3020 | sbrec_address_set_delete(node->data); | |
3021 | shash_delete(&sb_address_sets, node); | |
3022 | } | |
3023 | shash_destroy(&sb_address_sets); | |
3024 | } | |
5868eb24 | 3025 | \f |
4edcdcf4 | 3026 | static void |
331e7aef | 3027 | ovnnb_db_run(struct northd_context *ctx) |
4edcdcf4 | 3028 | { |
331e7aef NS |
3029 | if (!ctx->ovnsb_txn) { |
3030 | return; | |
3031 | } | |
5868eb24 BP |
3032 | struct hmap datapaths, ports; |
3033 | build_datapaths(ctx, &datapaths); | |
3034 | build_ports(ctx, &datapaths, &ports); | |
3035 | build_lflows(ctx, &datapaths, &ports); | |
3036 | ||
ea382567 RB |
3037 | sync_address_sets(ctx); |
3038 | ||
5868eb24 BP |
3039 | struct ovn_datapath *dp, *next_dp; |
3040 | HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) { | |
3041 | ovn_datapath_destroy(&datapaths, dp); | |
3042 | } | |
3043 | hmap_destroy(&datapaths); | |
3044 | ||
3045 | struct ovn_port *port, *next_port; | |
3046 | HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) { | |
3047 | ovn_port_destroy(&ports, port); | |
3048 | } | |
3049 | hmap_destroy(&ports); | |
ac0630a2 RB |
3050 | } |
3051 | ||
f93818dd RB |
3052 | /* |
3053 | * The only change we get notified about is if the 'chassis' column of the | |
dcda6e0d BP |
3054 | * 'Port_Binding' table changes. When this column is not empty, it means we |
3055 | * need to set the corresponding logical port as 'up' in the northbound DB. | |
f93818dd | 3056 | */ |
ac0630a2 | 3057 | static void |
331e7aef | 3058 | ovnsb_db_run(struct northd_context *ctx) |
ac0630a2 | 3059 | { |
331e7aef NS |
3060 | if (!ctx->ovnnb_txn) { |
3061 | return; | |
3062 | } | |
fc3113bc | 3063 | struct hmap lports_hmap; |
5868eb24 | 3064 | const struct sbrec_port_binding *sb; |
0ee00741 | 3065 | const struct nbrec_logical_switch_port *nbsp; |
fc3113bc RB |
3066 | |
3067 | struct lport_hash_node { | |
3068 | struct hmap_node node; | |
0ee00741 | 3069 | const struct nbrec_logical_switch_port *nbsp; |
4ec3d7c7 | 3070 | } *hash_node; |
f93818dd | 3071 | |
fc3113bc | 3072 | hmap_init(&lports_hmap); |
f93818dd | 3073 | |
0ee00741 | 3074 | NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) { |
fc3113bc | 3075 | hash_node = xzalloc(sizeof *hash_node); |
0ee00741 HK |
3076 | hash_node->nbsp = nbsp; |
3077 | hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0)); | |
fc3113bc RB |
3078 | } |
3079 | ||
5868eb24 | 3080 | SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) { |
0ee00741 | 3081 | nbsp = NULL; |
fc3113bc | 3082 | HMAP_FOR_EACH_WITH_HASH(hash_node, node, |
5868eb24 BP |
3083 | hash_string(sb->logical_port, 0), |
3084 | &lports_hmap) { | |
0ee00741 HK |
3085 | if (!strcmp(sb->logical_port, hash_node->nbsp->name)) { |
3086 | nbsp = hash_node->nbsp; | |
fc3113bc RB |
3087 | break; |
3088 | } | |
f93818dd RB |
3089 | } |
3090 | ||
0ee00741 | 3091 | if (!nbsp) { |
dcda6e0d | 3092 | /* The logical port doesn't exist for this port binding. This can |
2e2762d4 | 3093 | * happen under normal circumstances when ovn-northd hasn't gotten |
dcda6e0d | 3094 | * around to pruning the Port_Binding yet. */ |
f93818dd RB |
3095 | continue; |
3096 | } | |
3097 | ||
0ee00741 | 3098 | if (sb->chassis && (!nbsp->up || !*nbsp->up)) { |
f93818dd | 3099 | bool up = true; |
0ee00741 HK |
3100 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
3101 | } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) { | |
f93818dd | 3102 | bool up = false; |
0ee00741 | 3103 | nbrec_logical_switch_port_set_up(nbsp, &up, 1); |
f93818dd RB |
3104 | } |
3105 | } | |
fc3113bc | 3106 | |
4ec3d7c7 | 3107 | HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) { |
fc3113bc RB |
3108 | free(hash_node); |
3109 | } | |
3110 | hmap_destroy(&lports_hmap); | |
ac0630a2 RB |
3111 | } |
3112 | \f | |
45f98d4c | 3113 | |
60bdd011 | 3114 | static char *default_nb_db_; |
45f98d4c | 3115 | |
ac0630a2 | 3116 | static const char * |
60bdd011 | 3117 | default_nb_db(void) |
ac0630a2 | 3118 | { |
60bdd011 RM |
3119 | if (!default_nb_db_) { |
3120 | default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir()); | |
ac0630a2 | 3121 | } |
60bdd011 RM |
3122 | return default_nb_db_; |
3123 | } | |
3124 | ||
3125 | static char *default_sb_db_; | |
3126 | ||
3127 | static const char * | |
3128 | default_sb_db(void) | |
3129 | { | |
3130 | if (!default_sb_db_) { | |
3131 | default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir()); | |
3132 | } | |
3133 | return default_sb_db_; | |
ac0630a2 RB |
3134 | } |
3135 | ||
3136 | static void | |
3137 | parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) | |
3138 | { | |
3139 | enum { | |
67d9b930 | 3140 | DAEMON_OPTION_ENUMS, |
ac0630a2 RB |
3141 | VLOG_OPTION_ENUMS, |
3142 | }; | |
3143 | static const struct option long_options[] = { | |
ec78987f | 3144 | {"ovnsb-db", required_argument, NULL, 'd'}, |
ac0630a2 RB |
3145 | {"ovnnb-db", required_argument, NULL, 'D'}, |
3146 | {"help", no_argument, NULL, 'h'}, | |
3147 | {"options", no_argument, NULL, 'o'}, | |
3148 | {"version", no_argument, NULL, 'V'}, | |
67d9b930 | 3149 | DAEMON_LONG_OPTIONS, |
ac0630a2 RB |
3150 | VLOG_LONG_OPTIONS, |
3151 | STREAM_SSL_LONG_OPTIONS, | |
3152 | {NULL, 0, NULL, 0}, | |
3153 | }; | |
3154 | char *short_options = ovs_cmdl_long_options_to_short_options(long_options); | |
3155 | ||
3156 | for (;;) { | |
3157 | int c; | |
3158 | ||
3159 | c = getopt_long(argc, argv, short_options, long_options, NULL); | |
3160 | if (c == -1) { | |
3161 | break; | |
3162 | } | |
3163 | ||
3164 | switch (c) { | |
67d9b930 | 3165 | DAEMON_OPTION_HANDLERS; |
ac0630a2 RB |
3166 | VLOG_OPTION_HANDLERS; |
3167 | STREAM_SSL_OPTION_HANDLERS; | |
3168 | ||
3169 | case 'd': | |
ec78987f | 3170 | ovnsb_db = optarg; |
ac0630a2 RB |
3171 | break; |
3172 | ||
3173 | case 'D': | |
3174 | ovnnb_db = optarg; | |
3175 | break; | |
3176 | ||
3177 | case 'h': | |
3178 | usage(); | |
3179 | exit(EXIT_SUCCESS); | |
3180 | ||
3181 | case 'o': | |
3182 | ovs_cmdl_print_options(long_options); | |
3183 | exit(EXIT_SUCCESS); | |
3184 | ||
3185 | case 'V': | |
3186 | ovs_print_version(0, 0); | |
3187 | exit(EXIT_SUCCESS); | |
3188 | ||
3189 | default: | |
3190 | break; | |
3191 | } | |
3192 | } | |
3193 | ||
ec78987f | 3194 | if (!ovnsb_db) { |
60bdd011 | 3195 | ovnsb_db = default_sb_db(); |
ac0630a2 RB |
3196 | } |
3197 | ||
3198 | if (!ovnnb_db) { | |
60bdd011 | 3199 | ovnnb_db = default_nb_db(); |
ac0630a2 RB |
3200 | } |
3201 | ||
3202 | free(short_options); | |
3203 | } | |
3204 | ||
5868eb24 BP |
3205 | static void |
3206 | add_column_noalert(struct ovsdb_idl *idl, | |
3207 | const struct ovsdb_idl_column *column) | |
3208 | { | |
3209 | ovsdb_idl_add_column(idl, column); | |
3210 | ovsdb_idl_omit_alert(idl, column); | |
3211 | } | |
3212 | ||
ac0630a2 RB |
3213 | int |
3214 | main(int argc, char *argv[]) | |
3215 | { | |
ac0630a2 | 3216 | int res = EXIT_SUCCESS; |
7b303ff9 AW |
3217 | struct unixctl_server *unixctl; |
3218 | int retval; | |
3219 | bool exiting; | |
ac0630a2 RB |
3220 | |
3221 | fatal_ignore_sigpipe(); | |
3222 | set_program_name(argv[0]); | |
485f0696 | 3223 | service_start(&argc, &argv); |
ac0630a2 | 3224 | parse_options(argc, argv); |
67d9b930 | 3225 | |
e91b927d | 3226 | daemonize_start(false); |
7b303ff9 AW |
3227 | |
3228 | retval = unixctl_server_create(NULL, &unixctl); | |
3229 | if (retval) { | |
3230 | exit(EXIT_FAILURE); | |
3231 | } | |
3232 | unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting); | |
3233 | ||
3234 | daemonize_complete(); | |
67d9b930 | 3235 | |
ac0630a2 | 3236 | nbrec_init(); |
ec78987f | 3237 | sbrec_init(); |
ac0630a2 RB |
3238 | |
3239 | /* We want to detect all changes to the ovn-nb db. */ | |
331e7aef NS |
3240 | struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( |
3241 | ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true)); | |
3242 | ||
3243 | struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER( | |
3244 | ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true)); | |
3245 | ||
3246 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow); | |
3247 | add_column_noalert(ovnsb_idl_loop.idl, | |
3248 | &sbrec_logical_flow_col_logical_datapath); | |
3249 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline); | |
3250 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id); | |
3251 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority); | |
3252 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match); | |
3253 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions); | |
3254 | ||
3255 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group); | |
3256 | add_column_noalert(ovnsb_idl_loop.idl, | |
3257 | &sbrec_multicast_group_col_datapath); | |
3258 | add_column_noalert(ovnsb_idl_loop.idl, | |
3259 | &sbrec_multicast_group_col_tunnel_key); | |
3260 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name); | |
3261 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports); | |
3262 | ||
3263 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding); | |
3264 | add_column_noalert(ovnsb_idl_loop.idl, | |
3265 | &sbrec_datapath_binding_col_tunnel_key); | |
3266 | add_column_noalert(ovnsb_idl_loop.idl, | |
3267 | &sbrec_datapath_binding_col_external_ids); | |
3268 | ||
3269 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding); | |
3270 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath); | |
3271 | add_column_noalert(ovnsb_idl_loop.idl, | |
3272 | &sbrec_port_binding_col_logical_port); | |
3273 | add_column_noalert(ovnsb_idl_loop.idl, | |
3274 | &sbrec_port_binding_col_tunnel_key); | |
3275 | add_column_noalert(ovnsb_idl_loop.idl, | |
3276 | &sbrec_port_binding_col_parent_port); | |
3277 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag); | |
3278 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type); | |
3279 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options); | |
3280 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac); | |
3281 | ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis); | |
3282 | ||
ea382567 RB |
3283 | ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set); |
3284 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name); | |
3285 | add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses); | |
3286 | ||
331e7aef | 3287 | /* Main loop. */ |
7b303ff9 AW |
3288 | exiting = false; |
3289 | while (!exiting) { | |
331e7aef NS |
3290 | struct northd_context ctx = { |
3291 | .ovnnb_idl = ovnnb_idl_loop.idl, | |
3292 | .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop), | |
3293 | .ovnsb_idl = ovnsb_idl_loop.idl, | |
3294 | .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop), | |
3295 | }; | |
ac0630a2 | 3296 | |
8c0fae89 NS |
3297 | ovnnb_db_run(&ctx); |
3298 | ovnsb_db_run(&ctx); | |
f93818dd | 3299 | |
331e7aef NS |
3300 | unixctl_server_run(unixctl); |
3301 | unixctl_server_wait(unixctl); | |
3302 | if (exiting) { | |
3303 | poll_immediate_wake(); | |
ac0630a2 | 3304 | } |
331e7aef NS |
3305 | ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop); |
3306 | ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); | |
ac0630a2 | 3307 | |
331e7aef | 3308 | poll_block(); |
485f0696 GS |
3309 | if (should_service_stop()) { |
3310 | exiting = true; | |
3311 | } | |
ac0630a2 RB |
3312 | } |
3313 | ||
7b303ff9 | 3314 | unixctl_server_destroy(unixctl); |
331e7aef NS |
3315 | ovsdb_idl_loop_destroy(&ovnnb_idl_loop); |
3316 | ovsdb_idl_loop_destroy(&ovnsb_idl_loop); | |
485f0696 | 3317 | service_stop(); |
ac0630a2 | 3318 | |
60bdd011 RM |
3319 | free(default_nb_db_); |
3320 | free(default_sb_db_); | |
ac0630a2 RB |
3321 | exit(res); |
3322 | } | |
7b303ff9 AW |
3323 | |
3324 | static void | |
3325 | ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, | |
3326 | const char *argv[] OVS_UNUSED, void *exiting_) | |
3327 | { | |
3328 | bool *exiting = exiting_; | |
3329 | *exiting = true; | |
3330 | ||
3331 | unixctl_command_reply(conn, NULL); | |
3332 | } |