]>
Commit | Line | Data |
---|---|---|
ccc09689 EJ |
1 | /* Copyright (c) 2013 Nicira, Inc. |
2 | * | |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. */ | |
14 | ||
15 | #include <config.h> | |
16 | #include "bfd.h" | |
17 | ||
18 | #include <arpa/inet.h> | |
19 | ||
20 | #include "csum.h" | |
21 | #include "dpif.h" | |
22 | #include "dynamic-string.h" | |
23 | #include "flow.h" | |
24 | #include "hash.h" | |
25 | #include "hmap.h" | |
26 | #include "list.h" | |
27 | #include "netlink.h" | |
28 | #include "odp-util.h" | |
29 | #include "ofpbuf.h" | |
30 | #include "openvswitch/types.h" | |
31 | #include "packets.h" | |
32 | #include "poll-loop.h" | |
33 | #include "random.h" | |
34 | #include "smap.h" | |
35 | #include "timeval.h" | |
36 | #include "unixctl.h" | |
37 | #include "util.h" | |
38 | #include "vlog.h" | |
39 | ||
40 | VLOG_DEFINE_THIS_MODULE(bfd); | |
41 | ||
42 | /* XXX Finish BFD. | |
43 | * | |
44 | * The goal of this module is to replace CFM with something both more flexible | |
45 | * and standards compliant. In service of this goal, the following needs to be | |
46 | * done. | |
47 | * | |
48 | * - Compliance | |
49 | * * Implement Demand mode. | |
50 | * * Go through the RFC line by line and verify we comply. | |
51 | * * Test against a hardware implementation. Preferably a popular one. | |
52 | * * Delete BFD packets with nw_ttl != 255 in the datapath to prevent DOS | |
53 | * attacks. | |
54 | * | |
55 | * - Unit tests. | |
56 | * | |
57 | * - BFD show into ovs-bugtool. | |
58 | * | |
59 | * - Set TOS/PCP on inner BFD frame, and outer tunnel header when encapped. | |
60 | * | |
61 | * - CFM "check_tnl_key" option equivalent. | |
62 | * | |
63 | * - CFM "fault override" equivalent. | |
64 | * | |
65 | * - Sending BFD messages should be in its own thread/process. | |
66 | * | |
67 | * - Scale testing. How does it operate when there are large number of bfd | |
68 | * sessions? Do we ever have random flaps? What's the CPU utilization? | |
69 | * | |
70 | * - Rely on data traffic for liveness by using BFD demand mode. | |
71 | * If we're receiving traffic on a port, we can safely assume it's up (modulo | |
72 | * unidrectional failures). BFD has a demand mode in which it can stay quiet | |
73 | * unless it feels the need to check the status of the port. Using this, we | |
74 | * can implement a strategy in which BFD only sends control messages on dark | |
75 | * interfaces. | |
76 | * | |
77 | * - Depending on how one interprets the spec, it appears that a BFD session | |
78 | * can never change bfd.LocalDiag to "No Diagnostic". We should verify that | |
79 | * this is what hardware implementations actually do. Seems like "No | |
80 | * Diagnostic" should be set once a BFD session state goes UP. */ | |
81 | ||
82 | #define BFD_VERSION 1 | |
83 | ||
84 | enum flags { | |
85 | FLAG_MULTIPOINT = 1 << 0, | |
86 | FLAG_DEMAND = 1 << 1, | |
87 | FLAG_AUTH = 1 << 2, | |
88 | FLAG_CTL = 1 << 3, | |
89 | FLAG_FINAL = 1 << 4, | |
90 | FLAG_POLL = 1 << 5 | |
91 | }; | |
92 | ||
93 | enum state { | |
94 | STATE_ADMIN_DOWN = 0 << 6, | |
95 | STATE_DOWN = 1 << 6, | |
96 | STATE_INIT = 2 << 6, | |
97 | STATE_UP = 3 << 6 | |
98 | }; | |
99 | ||
100 | enum diag { | |
101 | DIAG_NONE = 0, /* No Diagnostic. */ | |
102 | DIAG_EXPIRED = 1, /* Control Detection Time Expired. */ | |
103 | DIAG_ECHO_FAILED = 2, /* Echo Function Failed. */ | |
104 | DIAG_RMT_DOWN = 3, /* Neighbor Signaled Session Down. */ | |
105 | DIAG_FWD_RESET = 4, /* Forwarding Plane Reset. */ | |
106 | DIAG_PATH_DOWN = 5, /* Path Down. */ | |
107 | DIAG_CPATH_DOWN = 6, /* Concatenated Path Down. */ | |
108 | DIAG_ADMIN_DOWN = 7, /* Administratively Down. */ | |
109 | DIAG_RCPATH_DOWN = 8 /* Reverse Concatenated Path Down. */ | |
110 | }; | |
111 | ||
112 | /* RFC 5880 Section 4.1 | |
113 | * 0 1 2 3 | |
114 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |
115 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
116 | * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | | |
117 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
118 | * | My Discriminator | | |
119 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
120 | * | Your Discriminator | | |
121 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
122 | * | Desired Min TX Interval | | |
123 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
124 | * | Required Min RX Interval | | |
125 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
126 | * | Required Min Echo RX Interval | | |
127 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ | |
128 | struct msg { | |
129 | uint8_t vers_diag; /* Version and diagnostic. */ | |
130 | uint8_t flags; /* 2bit State field followed by flags. */ | |
131 | uint8_t mult; /* Fault detection multiplier. */ | |
132 | uint8_t length; /* Length of this BFD message. */ | |
133 | ovs_be32 my_disc; /* My discriminator. */ | |
134 | ovs_be32 your_disc; /* Your discriminator. */ | |
135 | ovs_be32 min_tx; /* Desired minimum tx interval. */ | |
136 | ovs_be32 min_rx; /* Required minimum rx interval. */ | |
137 | ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */ | |
138 | }; | |
139 | BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); | |
140 | ||
141 | #define DIAG_MASK 0x1f | |
142 | #define VERS_SHIFT 5 | |
143 | #define STATE_MASK 0xC0 | |
144 | #define FLAGS_MASK 0x3f | |
145 | ||
146 | struct bfd { | |
147 | struct hmap_node node; /* In 'all_bfds'. */ | |
148 | uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */ | |
149 | ||
150 | char *name; /* Name used for logging. */ | |
151 | ||
152 | bool cpath_down; /* Concatenated Path Down. */ | |
153 | uint8_t mult; /* bfd.DetectMult. */ | |
154 | ||
155 | enum state state; /* bfd.SessionState. */ | |
156 | enum state rmt_state; /* bfd.RemoteSessionState. */ | |
157 | ||
158 | enum diag diag; /* bfd.LocalDiag. */ | |
159 | enum diag rmt_diag; /* Remote diagnostic. */ | |
160 | ||
161 | enum flags flags; /* Flags sent on messages. */ | |
162 | enum flags rmt_flags; /* Flags last received. */ | |
163 | ||
164 | uint32_t rmt_disc; /* bfd.RemoteDiscr. */ | |
165 | ||
166 | uint16_t udp_src; /* UDP source port. */ | |
167 | ||
168 | /* All timers in milliseconds. */ | |
169 | long long int rmt_min_rx; /* bfd.RemoteMinRxInterval. */ | |
170 | long long int rmt_min_tx; /* Remote minimum TX interval. */ | |
171 | ||
172 | long long int cfg_min_tx; /* Configured minimum TX rate. */ | |
173 | long long int cfg_min_rx; /* Configured required minimum RX rate. */ | |
174 | long long int poll_min_tx; /* Min TX negotating in a poll sequence. */ | |
175 | long long int poll_min_rx; /* Min RX negotating in a poll sequence. */ | |
176 | long long int min_tx; /* bfd.DesiredMinTxInterval. */ | |
177 | long long int min_rx; /* bfd.RequiredMinRxInterval. */ | |
178 | ||
179 | long long int last_tx; /* Last TX time. */ | |
180 | long long int next_tx; /* Next TX time. */ | |
181 | long long int detect_time; /* RFC 5880 6.8.4 Detection time. */ | |
182 | }; | |
183 | ||
184 | static bool bfd_in_poll(const struct bfd *); | |
185 | static void bfd_poll(struct bfd *bfd); | |
186 | static const char *bfd_diag_str(enum diag); | |
187 | static const char *bfd_state_str(enum state); | |
188 | static long long int bfd_min_tx(const struct bfd *); | |
189 | static long long int bfd_tx_interval(const struct bfd *); | |
190 | static long long int bfd_rx_interval(const struct bfd *); | |
191 | static void bfd_set_next_tx(struct bfd *); | |
192 | static void bfd_set_state(struct bfd *, enum state, enum diag); | |
193 | static uint32_t generate_discriminator(void); | |
194 | static void bfd_put_details(struct ds *, const struct bfd *); | |
195 | static void bfd_unixctl_show(struct unixctl_conn *, int argc, | |
196 | const char *argv[], void *aux OVS_UNUSED); | |
197 | static void log_msg(enum vlog_level, const struct msg *, const char *message, | |
198 | const struct bfd *); | |
199 | ||
200 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 20); | |
201 | static struct hmap all_bfds = HMAP_INITIALIZER(&all_bfds); | |
202 | ||
203 | /* Returns true if the interface on which 'bfd' is running may be used to | |
204 | * forward traffic according to the BFD session state. */ | |
205 | bool | |
206 | bfd_forwarding(const struct bfd *bfd) | |
207 | { | |
208 | return bfd->state == STATE_UP | |
209 | && bfd->rmt_diag != DIAG_PATH_DOWN | |
210 | && bfd->rmt_diag != DIAG_CPATH_DOWN | |
211 | && bfd->rmt_diag != DIAG_RCPATH_DOWN; | |
212 | } | |
213 | ||
214 | /* Returns a 'smap' of key value pairs representing the status of 'bfd' | |
215 | * intended for the OVS database. */ | |
216 | void | |
217 | bfd_get_status(const struct bfd *bfd, struct smap *smap) | |
218 | { | |
219 | smap_add(smap, "forwarding", bfd_forwarding(bfd) ? "true" : "false"); | |
220 | smap_add(smap, "state", bfd_state_str(bfd->state)); | |
221 | smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag)); | |
222 | ||
223 | if (bfd->state != STATE_DOWN) { | |
224 | smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state)); | |
225 | smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag)); | |
226 | } | |
227 | } | |
228 | ||
229 | /* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'), | |
230 | * according to the database configuration contained in 'cfg'. Takes ownership | |
231 | * of 'bfd', which may be NULL. Returns a BFD object which may be used as a | |
8aee94b6 PR |
232 | * handle for the session, or NULL if BFD is not enabled according to 'cfg'. |
233 | * Also returns NULL if cfg is NULL. */ | |
ccc09689 EJ |
234 | struct bfd * |
235 | bfd_configure(struct bfd *bfd, const char *name, | |
236 | const struct smap *cfg) | |
237 | { | |
238 | static uint16_t udp_src = 0; | |
239 | static bool init = false; | |
240 | ||
241 | long long int min_tx, min_rx; | |
242 | bool cpath_down; | |
243 | ||
244 | if (!init) { | |
245 | unixctl_command_register("bfd/show", "[interface]", 0, 1, | |
246 | bfd_unixctl_show, NULL); | |
247 | init = true; | |
248 | } | |
249 | ||
8aee94b6 | 250 | if (!cfg || !smap_get_bool(cfg, "enable", false)) { |
ccc09689 EJ |
251 | if (bfd) { |
252 | hmap_remove(&all_bfds, &bfd->node); | |
253 | free(bfd->name); | |
254 | free(bfd); | |
255 | } | |
256 | return NULL; | |
257 | } | |
258 | ||
259 | if (!bfd) { | |
260 | bfd = xzalloc(sizeof *bfd); | |
261 | bfd->name = xstrdup(name); | |
262 | bfd->disc = generate_discriminator(); | |
263 | hmap_insert(&all_bfds, &bfd->node, bfd->disc); | |
264 | ||
265 | bfd->diag = DIAG_NONE; | |
266 | bfd->min_tx = 1000; | |
267 | bfd->mult = 3; | |
268 | ||
269 | /* RFC 5881 section 4 | |
270 | * The source port MUST be in the range 49152 through 65535. The same | |
271 | * UDP source port number MUST be used for all BFD Control packets | |
272 | * associated with a particular session. The source port number SHOULD | |
273 | * be unique among all BFD sessions on the system. */ | |
274 | bfd->udp_src = (udp_src++ % 16384) + 49152; | |
275 | ||
276 | bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); | |
277 | } | |
278 | ||
279 | min_tx = smap_get_int(cfg, "min_tx", 100); | |
280 | min_tx = MAX(min_tx, 100); | |
281 | if (bfd->cfg_min_tx != min_tx) { | |
282 | bfd->cfg_min_tx = min_tx; | |
283 | if (bfd->state != STATE_UP | |
284 | || (!bfd_in_poll(bfd) && bfd->cfg_min_tx < bfd->min_tx)) { | |
285 | bfd->min_tx = bfd->cfg_min_tx; | |
286 | } | |
287 | bfd_poll(bfd); | |
288 | } | |
289 | ||
290 | min_rx = smap_get_int(cfg, "min_rx", 1000); | |
291 | min_rx = MAX(min_rx, 100); | |
292 | if (bfd->cfg_min_rx != min_rx) { | |
293 | bfd->cfg_min_rx = min_rx; | |
294 | if (bfd->state != STATE_UP | |
295 | || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx)) { | |
296 | bfd->min_rx = bfd->cfg_min_rx; | |
297 | } | |
298 | bfd_poll(bfd); | |
299 | } | |
300 | ||
301 | cpath_down = smap_get_bool(cfg, "cpath_down", false); | |
302 | if (bfd->cpath_down != cpath_down) { | |
303 | bfd->cpath_down = cpath_down; | |
304 | if (bfd->diag == DIAG_NONE || bfd->diag == DIAG_CPATH_DOWN) { | |
305 | bfd_set_state(bfd, bfd->state, DIAG_NONE); | |
306 | } | |
307 | bfd_poll(bfd); | |
308 | } | |
309 | return bfd; | |
310 | } | |
311 | ||
312 | void | |
313 | bfd_wait(const struct bfd *bfd) | |
314 | { | |
315 | if (bfd->flags & FLAG_FINAL) { | |
316 | poll_immediate_wake(); | |
317 | } | |
318 | ||
319 | poll_timer_wait_until(bfd->next_tx); | |
320 | if (bfd->state > STATE_DOWN) { | |
321 | poll_timer_wait_until(bfd->detect_time); | |
322 | } | |
323 | } | |
324 | ||
325 | void | |
326 | bfd_run(struct bfd *bfd) | |
327 | { | |
328 | if (bfd->state > STATE_DOWN && time_msec() >= bfd->detect_time) { | |
329 | bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED); | |
330 | } | |
331 | ||
332 | if (bfd->min_tx != bfd->cfg_min_tx || bfd->min_rx != bfd->cfg_min_rx) { | |
333 | bfd_poll(bfd); | |
334 | } | |
335 | } | |
336 | ||
337 | bool | |
338 | bfd_should_send_packet(const struct bfd *bfd) | |
339 | { | |
340 | return bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx; | |
341 | } | |
342 | ||
343 | void | |
344 | bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, | |
345 | uint8_t eth_src[ETH_ADDR_LEN]) | |
346 | { | |
347 | long long int min_tx, min_rx; | |
348 | struct udp_header *udp; | |
349 | struct eth_header *eth; | |
350 | struct ip_header *ip; | |
351 | struct msg *msg; | |
352 | ||
353 | if (bfd->next_tx) { | |
354 | long long int delay = time_msec() - bfd->next_tx; | |
355 | long long int interval = bfd_tx_interval(bfd); | |
356 | if (delay > interval * 3 / 2) { | |
357 | VLOG_WARN("%s: long delay of %lldms (expected %lldms) sending BFD" | |
358 | " control message", bfd->name, delay, interval); | |
359 | } | |
360 | } | |
361 | ||
362 | /* RFC 5880 Section 6.5 | |
363 | * A BFD Control packet MUST NOT have both the Poll (P) and Final (F) bits | |
364 | * set. */ | |
365 | ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); | |
366 | ||
367 | ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ | |
368 | eth = ofpbuf_put_uninit(p, sizeof *eth); | |
369 | memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); | |
370 | memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); | |
371 | eth->eth_type = htons(ETH_TYPE_IP); | |
372 | ||
373 | ip = ofpbuf_put_zeros(p, sizeof *ip); | |
374 | ip->ip_ihl_ver = IP_IHL_VER(5, 4); | |
375 | ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); | |
376 | ip->ip_ttl = 255; | |
377 | ip->ip_proto = IPPROTO_UDP; | |
378 | ip->ip_src = htonl(0xA9FE0100); /* 169.254.1.0 Link Local. */ | |
379 | ip->ip_dst = htonl(0xA9FE0101); /* 169.254.1.1 Link Local. */ | |
380 | ip->ip_csum = csum(ip, sizeof *ip); | |
381 | ||
382 | udp = ofpbuf_put_zeros(p, sizeof *udp); | |
383 | udp->udp_src = htons(bfd->udp_src); | |
384 | udp->udp_dst = htons(BFD_DEST_PORT); | |
385 | udp->udp_len = htons(sizeof *udp + sizeof *msg); | |
386 | ||
387 | msg = ofpbuf_put_uninit(p, sizeof *msg); | |
388 | msg->vers_diag = (BFD_VERSION << 5) | bfd->diag; | |
389 | msg->flags = (bfd->state & STATE_MASK) | bfd->flags; | |
390 | ||
391 | msg->mult = bfd->mult; | |
392 | msg->length = BFD_PACKET_LEN; | |
393 | msg->my_disc = htonl(bfd->disc); | |
394 | msg->your_disc = htonl(bfd->rmt_disc); | |
395 | msg->min_rx_echo = htonl(0); | |
396 | ||
397 | if (bfd_in_poll(bfd)) { | |
398 | min_tx = bfd->poll_min_tx; | |
399 | min_rx = bfd->poll_min_rx; | |
400 | } else { | |
401 | min_tx = bfd_min_tx(bfd); | |
402 | min_rx = bfd->min_rx; | |
403 | } | |
404 | ||
405 | msg->min_tx = htonl(min_tx * 1000); | |
406 | msg->min_rx = htonl(min_rx * 1000); | |
407 | ||
408 | bfd->flags &= ~FLAG_FINAL; | |
409 | ||
410 | log_msg(VLL_DBG, msg, "Sending BFD Message", bfd); | |
411 | ||
412 | bfd->last_tx = time_msec(); | |
413 | bfd_set_next_tx(bfd); | |
414 | } | |
415 | ||
416 | bool | |
642dc74d | 417 | bfd_should_process_flow(const struct flow *flow, struct flow_wildcards *wc) |
ccc09689 | 418 | { |
642dc74d JP |
419 | memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); |
420 | memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); | |
ccc09689 EJ |
421 | return (flow->dl_type == htons(ETH_TYPE_IP) |
422 | && flow->nw_proto == IPPROTO_UDP | |
423 | && flow->tp_dst == htons(3784)); | |
424 | } | |
425 | ||
426 | void | |
427 | bfd_process_packet(struct bfd *bfd, const struct flow *flow, | |
428 | const struct ofpbuf *p) | |
429 | { | |
430 | uint32_t rmt_min_rx, pkt_your_disc; | |
431 | enum state rmt_state; | |
432 | enum flags flags; | |
433 | uint8_t version; | |
434 | struct msg *msg; | |
435 | ||
436 | /* This function is designed to follow section RFC 5880 6.8.6 closely. */ | |
437 | ||
438 | if (flow->nw_ttl != 255) { | |
439 | /* XXX Should drop in the kernel to prevent DOS. */ | |
440 | return; | |
441 | } | |
442 | ||
443 | msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN); | |
444 | if (!msg) { | |
445 | VLOG_INFO_RL(&rl, "%s: Received unparseable BFD control message.", | |
446 | bfd->name); | |
447 | return; | |
448 | } | |
449 | ||
450 | /* RFC 5880 Section 6.8.6 | |
451 | * If the Length field is greater than the payload of the encapsulating | |
452 | * protocol, the packet MUST be discarded. | |
453 | * | |
454 | * Note that we make this check implicity. Above we use ofpbuf_at() to | |
455 | * ensure that there are at least BFD_PACKET_LEN bytes in the payload of | |
456 | * the encapsulating protocol. Below we require msg->length to be exactly | |
457 | * BFD_PACKET_LEN bytes. */ | |
458 | ||
459 | flags = msg->flags & FLAGS_MASK; | |
460 | rmt_state = msg->flags & STATE_MASK; | |
461 | version = msg->vers_diag >> VERS_SHIFT; | |
462 | ||
463 | log_msg(VLL_DBG, msg, "Received BFD control message", bfd); | |
464 | ||
465 | if (version != BFD_VERSION) { | |
466 | log_msg(VLL_WARN, msg, "Incorrect version", bfd); | |
467 | return; | |
468 | } | |
469 | ||
470 | /* Technically this should happen after the length check. We don't support | |
471 | * authentication however, so it's simpler to do the check first. */ | |
472 | if (flags & FLAG_AUTH) { | |
473 | log_msg(VLL_WARN, msg, "Authenticated control message with" | |
474 | " authentication disabled", bfd); | |
475 | return; | |
476 | } | |
477 | ||
478 | if (msg->length != BFD_PACKET_LEN) { | |
479 | log_msg(VLL_WARN, msg, "Unexpected length", bfd); | |
480 | if (msg->length < BFD_PACKET_LEN) { | |
481 | return; | |
482 | } | |
483 | } | |
484 | ||
485 | if (!msg->mult) { | |
486 | log_msg(VLL_WARN, msg, "Zero multiplier", bfd); | |
487 | return; | |
488 | } | |
489 | ||
490 | if (flags & FLAG_MULTIPOINT) { | |
491 | log_msg(VLL_WARN, msg, "Unsupported multipoint flag", bfd); | |
492 | return; | |
493 | } | |
494 | ||
495 | if (!msg->my_disc) { | |
496 | log_msg(VLL_WARN, msg, "NULL my_disc", bfd); | |
497 | return; | |
498 | } | |
499 | ||
500 | pkt_your_disc = ntohl(msg->your_disc); | |
501 | if (pkt_your_disc) { | |
502 | /* Technically, we should use the your discriminator field to figure | |
503 | * out which 'struct bfd' this packet is destined towards. That way a | |
504 | * bfd session could migrate from one interface to another | |
505 | * transparently. This doesn't fit in with the OVS structure very | |
506 | * well, so in this respect, we are not compliant. */ | |
507 | if (pkt_your_disc != bfd->disc) { | |
508 | log_msg(VLL_WARN, msg, "Incorrect your_disc", bfd); | |
509 | return; | |
510 | } | |
511 | } else if (rmt_state > STATE_DOWN) { | |
512 | log_msg(VLL_WARN, msg, "Null your_disc", bfd); | |
513 | return; | |
514 | } | |
515 | ||
516 | bfd->rmt_disc = ntohl(msg->my_disc); | |
517 | bfd->rmt_state = rmt_state; | |
518 | bfd->rmt_flags = flags; | |
519 | bfd->rmt_diag = msg->vers_diag & DIAG_MASK; | |
520 | ||
521 | if (flags & FLAG_FINAL && bfd_in_poll(bfd)) { | |
522 | bfd->min_tx = bfd->poll_min_tx; | |
523 | bfd->min_rx = bfd->poll_min_rx; | |
524 | bfd->flags &= ~FLAG_POLL; | |
525 | log_msg(VLL_INFO, msg, "Poll sequence terminated", bfd); | |
526 | } | |
527 | ||
528 | if (flags & FLAG_POLL) { | |
529 | /* RFC 5880 Section 6.5 | |
530 | * When the other system receives a Poll, it immediately transmits a | |
531 | * BFD Control packet with the Final (F) bit set, independent of any | |
532 | * periodic BFD Control packets it may be sending | |
533 | * (see section 6.8.7). */ | |
534 | bfd->flags &= ~FLAG_POLL; | |
535 | bfd->flags |= FLAG_FINAL; | |
536 | } | |
537 | ||
538 | rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1); | |
539 | if (bfd->rmt_min_rx != rmt_min_rx) { | |
540 | bfd->rmt_min_rx = rmt_min_rx; | |
541 | bfd_set_next_tx(bfd); | |
542 | log_msg(VLL_INFO, msg, "New remote min_rx", bfd); | |
543 | } | |
544 | ||
545 | bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1); | |
546 | bfd->detect_time = bfd_rx_interval(bfd) * bfd->mult + time_msec(); | |
547 | ||
548 | if (bfd->state == STATE_ADMIN_DOWN) { | |
549 | VLOG_DBG_RL(&rl, "Administratively down, dropping control message."); | |
550 | return; | |
551 | } | |
552 | ||
553 | if (rmt_state == STATE_ADMIN_DOWN) { | |
554 | if (bfd->state != STATE_DOWN) { | |
555 | bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); | |
556 | } | |
557 | } else { | |
558 | switch (bfd->state) { | |
559 | case STATE_DOWN: | |
560 | if (rmt_state == STATE_DOWN) { | |
561 | bfd_set_state(bfd, STATE_INIT, bfd->diag); | |
562 | } else if (rmt_state == STATE_INIT) { | |
563 | bfd_set_state(bfd, STATE_UP, bfd->diag); | |
564 | } | |
565 | break; | |
566 | case STATE_INIT: | |
567 | if (rmt_state > STATE_DOWN) { | |
568 | bfd_set_state(bfd, STATE_UP, bfd->diag); | |
569 | } | |
570 | break; | |
571 | case STATE_UP: | |
572 | if (rmt_state <= STATE_DOWN) { | |
573 | bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); | |
574 | log_msg(VLL_INFO, msg, "Remote signaled STATE_DOWN", bfd); | |
575 | } | |
576 | break; | |
577 | case STATE_ADMIN_DOWN: | |
578 | default: | |
579 | NOT_REACHED(); | |
580 | } | |
581 | } | |
582 | /* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */ | |
583 | } | |
584 | \f | |
585 | /* Helpers. */ | |
586 | static bool | |
587 | bfd_in_poll(const struct bfd *bfd) | |
588 | { | |
589 | return (bfd->flags & FLAG_POLL) != 0; | |
590 | } | |
591 | ||
592 | static void | |
593 | bfd_poll(struct bfd *bfd) | |
594 | { | |
595 | if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd) | |
596 | && !(bfd->flags & FLAG_FINAL)) { | |
597 | bfd->poll_min_tx = bfd->cfg_min_tx; | |
598 | bfd->poll_min_rx = bfd->cfg_min_rx; | |
599 | bfd->flags |= FLAG_POLL; | |
600 | bfd->next_tx = 0; | |
601 | VLOG_INFO_RL(&rl, "%s: Initiating poll sequence", bfd->name); | |
602 | } | |
603 | } | |
604 | ||
605 | static long long int | |
606 | bfd_min_tx(const struct bfd *bfd) | |
607 | { | |
608 | /* RFC 5880 Section 6.8.3 | |
609 | * When bfd.SessionState is not Up, the system MUST set | |
610 | * bfd.DesiredMinTxInterval to a value of not less than one second | |
611 | * (1,000,000 microseconds). This is intended to ensure that the | |
612 | * bandwidth consumed by BFD sessions that are not Up is negligible, | |
613 | * particularly in the case where a neighbor may not be running BFD. */ | |
614 | return (bfd->state == STATE_UP ? bfd->min_tx : MAX(bfd->min_tx, 1000)); | |
615 | } | |
616 | ||
617 | static long long int | |
618 | bfd_tx_interval(const struct bfd *bfd) | |
619 | { | |
620 | long long int interval = bfd_min_tx(bfd); | |
621 | return MAX(interval, bfd->rmt_min_rx); | |
622 | } | |
623 | ||
624 | static long long int | |
625 | bfd_rx_interval(const struct bfd *bfd) | |
626 | { | |
627 | return MAX(bfd->min_rx, bfd->rmt_min_tx); | |
628 | } | |
629 | ||
630 | static void | |
631 | bfd_set_next_tx(struct bfd *bfd) | |
632 | { | |
633 | long long int interval = bfd_tx_interval(bfd); | |
634 | interval -= interval * random_range(26) / 100; | |
635 | bfd->next_tx = bfd->last_tx + interval; | |
636 | } | |
637 | ||
638 | static const char * | |
639 | bfd_flag_str(enum flags flags) | |
640 | { | |
641 | struct ds ds = DS_EMPTY_INITIALIZER; | |
642 | static char flag_str[128]; | |
643 | ||
644 | if (!flags) { | |
645 | return "none"; | |
646 | } | |
647 | ||
648 | if (flags & FLAG_MULTIPOINT) { | |
649 | ds_put_cstr(&ds, "multipoint "); | |
650 | } | |
651 | ||
652 | if (flags & FLAG_DEMAND) { | |
653 | ds_put_cstr(&ds, "demand "); | |
654 | } | |
655 | ||
656 | if (flags & FLAG_AUTH) { | |
657 | ds_put_cstr(&ds, "auth "); | |
658 | } | |
659 | ||
660 | if (flags & FLAG_CTL) { | |
661 | ds_put_cstr(&ds, "ctl "); | |
662 | } | |
663 | ||
664 | if (flags & FLAG_FINAL) { | |
665 | ds_put_cstr(&ds, "final "); | |
666 | } | |
667 | ||
668 | if (flags & FLAG_POLL) { | |
669 | ds_put_cstr(&ds, "poll "); | |
670 | } | |
671 | ||
672 | ovs_strlcpy(flag_str, ds_cstr(&ds), sizeof flag_str); | |
673 | ds_destroy(&ds); | |
674 | return flag_str; | |
675 | } | |
676 | ||
677 | static const char * | |
678 | bfd_state_str(enum state state) | |
679 | { | |
680 | switch (state) { | |
681 | case STATE_ADMIN_DOWN: return "admin_down"; | |
682 | case STATE_DOWN: return "down"; | |
683 | case STATE_INIT: return "init"; | |
684 | case STATE_UP: return "up"; | |
685 | default: return "invalid"; | |
686 | } | |
687 | } | |
688 | ||
689 | static const char * | |
690 | bfd_diag_str(enum diag diag) { | |
691 | switch (diag) { | |
692 | case DIAG_NONE: return "No Diagnostic"; | |
693 | case DIAG_EXPIRED: return "Control Detection Time Expired"; | |
694 | case DIAG_ECHO_FAILED: return "Echo Function Failed"; | |
695 | case DIAG_RMT_DOWN: return "Neighbor Signaled Session Down"; | |
696 | case DIAG_FWD_RESET: return "Forwarding Plane Reset"; | |
697 | case DIAG_PATH_DOWN: return "Path Down"; | |
698 | case DIAG_CPATH_DOWN: return "Concatenated Path Down"; | |
699 | case DIAG_ADMIN_DOWN: return "Administratively Down"; | |
700 | case DIAG_RCPATH_DOWN: return "Reverse Concatenated Path Down"; | |
701 | default: return "Invalid Diagnostic"; | |
702 | } | |
703 | }; | |
704 | ||
705 | static void | |
706 | log_msg(enum vlog_level level, const struct msg *p, const char *message, | |
707 | const struct bfd *bfd) | |
708 | { | |
709 | struct ds ds = DS_EMPTY_INITIALIZER; | |
710 | ||
711 | if (vlog_should_drop(THIS_MODULE, level, &rl)) { | |
712 | return; | |
713 | } | |
714 | ||
715 | ds_put_format(&ds, | |
716 | "%s: %s." | |
717 | "\n\tvers:%"PRIu8" diag:\"%s\" state:%s mult:%"PRIu8 | |
718 | " length:%"PRIu8 | |
719 | "\n\tflags: %s" | |
720 | "\n\tmy_disc:0x%"PRIx32" your_disc:0x%"PRIx32 | |
721 | "\n\tmin_tx:%"PRIu32"us (%"PRIu32"ms)" | |
722 | "\n\tmin_rx:%"PRIu32"us (%"PRIu32"ms)" | |
723 | "\n\tmin_rx_echo:%"PRIu32"us (%"PRIu32"ms)", | |
724 | bfd->name, message, p->vers_diag >> VERS_SHIFT, | |
725 | bfd_diag_str(p->vers_diag & DIAG_MASK), | |
726 | bfd_state_str(p->flags & STATE_MASK), | |
727 | p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK), | |
728 | ntohl(p->my_disc), ntohl(p->your_disc), | |
729 | ntohl(p->min_tx), ntohl(p->min_tx) / 1000, | |
730 | ntohl(p->min_rx), ntohl(p->min_rx) / 1000, | |
731 | ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000); | |
732 | bfd_put_details(&ds, bfd); | |
733 | VLOG(level, "%s", ds_cstr(&ds)); | |
734 | ds_destroy(&ds); | |
735 | } | |
736 | ||
737 | static void | |
738 | bfd_set_state(struct bfd *bfd, enum state state, enum diag diag) | |
739 | { | |
740 | if (diag == DIAG_NONE && bfd->cpath_down) { | |
741 | diag = DIAG_CPATH_DOWN; | |
742 | } | |
743 | ||
744 | if (bfd->state != state || bfd->diag != diag) { | |
745 | if (!VLOG_DROP_INFO(&rl)) { | |
746 | struct ds ds = DS_EMPTY_INITIALIZER; | |
747 | ||
748 | ds_put_format(&ds, "%s: BFD state change: %s->%s" | |
749 | " \"%s\"->\"%s\".\n", | |
750 | bfd->name, bfd_state_str(bfd->state), | |
751 | bfd_state_str(state), bfd_diag_str(bfd->diag), | |
752 | bfd_diag_str(diag)); | |
753 | bfd_put_details(&ds, bfd); | |
754 | VLOG_INFO("%s", ds_cstr(&ds)); | |
755 | ds_destroy(&ds); | |
756 | } | |
757 | ||
758 | bfd->state = state; | |
759 | bfd->diag = diag; | |
760 | ||
761 | if (bfd->state <= STATE_DOWN) { | |
762 | bfd->rmt_state = STATE_DOWN; | |
763 | bfd->rmt_diag = DIAG_NONE; | |
764 | bfd->rmt_min_rx = 1; | |
765 | bfd->rmt_flags = 0; | |
766 | bfd->rmt_disc = 0; | |
767 | bfd->rmt_min_tx = 0; | |
768 | } | |
769 | } | |
770 | } | |
771 | ||
772 | static uint32_t | |
773 | generate_discriminator(void) | |
774 | { | |
775 | uint32_t disc = 0; | |
776 | ||
777 | /* RFC 5880 Section 6.8.1 | |
778 | * It SHOULD be set to a random (but still unique) value to improve | |
779 | * security. The value is otherwise outside the scope of this | |
780 | * specification. */ | |
781 | ||
782 | while (!disc) { | |
783 | struct bfd *bfd; | |
784 | ||
785 | /* 'disc' is by defnition random, so there's no reason to waste time | |
786 | * hashing it. */ | |
787 | disc = random_uint32(); | |
788 | HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, &all_bfds) { | |
789 | if (bfd->disc == disc) { | |
790 | disc = 0; | |
791 | break; | |
792 | } | |
793 | } | |
794 | } | |
795 | ||
796 | return disc; | |
797 | } | |
798 | ||
799 | static struct bfd * | |
800 | bfd_find_by_name(const char *name) | |
801 | { | |
802 | struct bfd *bfd; | |
803 | ||
804 | HMAP_FOR_EACH (bfd, node, &all_bfds) { | |
805 | if (!strcmp(bfd->name, name)) { | |
806 | return bfd; | |
807 | } | |
808 | } | |
809 | return NULL; | |
810 | } | |
811 | ||
812 | static void | |
813 | bfd_put_details(struct ds *ds, const struct bfd *bfd) | |
814 | { | |
815 | ds_put_format(ds, "\tForwarding: %s\n", | |
816 | bfd_forwarding(bfd) ? "true" : "false"); | |
817 | ds_put_format(ds, "\tDetect Multiplier: %d\n", bfd->mult); | |
818 | ds_put_format(ds, "\tConcatenated Path Down: %s\n", | |
819 | bfd->cpath_down ? "true" : "false"); | |
820 | ds_put_format(ds, "\tTX Interval: Approx %lldms\n", bfd_tx_interval(bfd)); | |
821 | ds_put_format(ds, "\tRX Interval: Approx %lldms\n", bfd_rx_interval(bfd)); | |
822 | ds_put_format(ds, "\tDetect Time: now %+lldms\n", | |
823 | time_msec() - bfd->detect_time); | |
824 | ds_put_format(ds, "\tNext TX Time: now %+lldms\n", | |
825 | time_msec() - bfd->next_tx); | |
826 | ds_put_format(ds, "\tLast TX Time: now %+lldms\n", | |
827 | time_msec() - bfd->last_tx); | |
828 | ||
829 | ds_put_cstr(ds, "\n"); | |
830 | ||
831 | ds_put_format(ds, "\tLocal Flags: %s\n", bfd_flag_str(bfd->flags)); | |
832 | ds_put_format(ds, "\tLocal Session State: %s\n", | |
833 | bfd_state_str(bfd->state)); | |
834 | ds_put_format(ds, "\tLocal Diagnostic: %s\n", bfd_diag_str(bfd->diag)); | |
835 | ds_put_format(ds, "\tLocal Discriminator: 0x%"PRIx32"\n", bfd->disc); | |
836 | ds_put_format(ds, "\tLocal Minimum TX Interval: %lldms\n", | |
837 | bfd_min_tx(bfd)); | |
838 | ds_put_format(ds, "\tLocal Minimum RX Interval: %lldms\n", bfd->min_rx); | |
839 | ||
840 | ds_put_cstr(ds, "\n"); | |
841 | ||
842 | ds_put_format(ds, "\tRemote Flags: %s\n", bfd_flag_str(bfd->rmt_flags)); | |
843 | ds_put_format(ds, "\tRemote Session State: %s\n", | |
844 | bfd_state_str(bfd->rmt_state)); | |
845 | ds_put_format(ds, "\tRemote Diagnostic: %s\n", | |
846 | bfd_diag_str(bfd->rmt_diag)); | |
847 | ds_put_format(ds, "\tRemote Discriminator: 0x%"PRIx32"\n", bfd->rmt_disc); | |
848 | ds_put_format(ds, "\tRemote Minimum TX Interval: %lldms\n", | |
849 | bfd->rmt_min_tx); | |
850 | ds_put_format(ds, "\tRemote Minimum RX Interval: %lldms\n", | |
851 | bfd->rmt_min_rx); | |
852 | } | |
853 | ||
854 | static void | |
855 | bfd_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], | |
856 | void *aux OVS_UNUSED) | |
857 | { | |
858 | struct ds ds = DS_EMPTY_INITIALIZER; | |
859 | struct bfd *bfd; | |
860 | ||
861 | if (argc > 1) { | |
862 | bfd = bfd_find_by_name(argv[1]); | |
863 | if (!bfd) { | |
864 | unixctl_command_reply_error(conn, "no such bfd object"); | |
865 | return; | |
866 | } | |
867 | bfd_put_details(&ds, bfd); | |
868 | } else { | |
869 | HMAP_FOR_EACH (bfd, node, &all_bfds) { | |
870 | ds_put_format(&ds, "---- %s ----\n", bfd->name); | |
871 | bfd_put_details(&ds, bfd); | |
872 | } | |
873 | } | |
874 | unixctl_command_reply(conn, ds_cstr(&ds)); | |
875 | ds_destroy(&ds); | |
876 | } |