]>
Commit | Line | Data |
---|---|---|
ccc09689 EJ |
1 | /* Copyright (c) 2013 Nicira, Inc. |
2 | * | |
3 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | * you may not use this file except in compliance with the License. | |
5 | * You may obtain a copy of the License at: | |
6 | * | |
7 | * http://www.apache.org/licenses/LICENSE-2.0 | |
8 | * | |
9 | * Unless required by applicable law or agreed to in writing, software | |
10 | * distributed under the License is distributed on an "AS IS" BASIS, | |
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | * See the License for the specific language governing permissions and | |
13 | * limitations under the License. */ | |
14 | ||
15 | #include <config.h> | |
16 | #include "bfd.h" | |
17 | ||
b532f3f0 | 18 | #include <sys/types.h> |
ccc09689 | 19 | #include <arpa/inet.h> |
2fbf137d | 20 | #include <netinet/in_systm.h> |
b644259f | 21 | #include <netinet/ip.h> |
ccc09689 | 22 | |
f645ee9c | 23 | #include "byte-order.h" |
f23d157c | 24 | #include "connectivity.h" |
ccc09689 EJ |
25 | #include "csum.h" |
26 | #include "dpif.h" | |
27 | #include "dynamic-string.h" | |
28 | #include "flow.h" | |
29 | #include "hash.h" | |
30 | #include "hmap.h" | |
31 | #include "list.h" | |
c1c4e8c7 | 32 | #include "netdev.h" |
ccc09689 EJ |
33 | #include "netlink.h" |
34 | #include "odp-util.h" | |
35 | #include "ofpbuf.h" | |
26131299 | 36 | #include "ovs-thread.h" |
ccc09689 EJ |
37 | #include "openvswitch/types.h" |
38 | #include "packets.h" | |
39 | #include "poll-loop.h" | |
40 | #include "random.h" | |
f23d157c | 41 | #include "seq.h" |
ccc09689 EJ |
42 | #include "smap.h" |
43 | #include "timeval.h" | |
7c457c33 | 44 | #include "unaligned.h" |
ccc09689 EJ |
45 | #include "unixctl.h" |
46 | #include "util.h" | |
47 | #include "vlog.h" | |
48 | ||
49 | VLOG_DEFINE_THIS_MODULE(bfd); | |
50 | ||
51 | /* XXX Finish BFD. | |
52 | * | |
53 | * The goal of this module is to replace CFM with something both more flexible | |
54 | * and standards compliant. In service of this goal, the following needs to be | |
55 | * done. | |
56 | * | |
57 | * - Compliance | |
58 | * * Implement Demand mode. | |
59 | * * Go through the RFC line by line and verify we comply. | |
60 | * * Test against a hardware implementation. Preferably a popular one. | |
61 | * * Delete BFD packets with nw_ttl != 255 in the datapath to prevent DOS | |
62 | * attacks. | |
63 | * | |
64 | * - Unit tests. | |
65 | * | |
b644259f | 66 | * - Set TOS/PCP on the outer tunnel header when encapped. |
ccc09689 | 67 | * |
ccc09689 EJ |
68 | * - Sending BFD messages should be in its own thread/process. |
69 | * | |
70 | * - Scale testing. How does it operate when there are large number of bfd | |
71 | * sessions? Do we ever have random flaps? What's the CPU utilization? | |
72 | * | |
73 | * - Rely on data traffic for liveness by using BFD demand mode. | |
74 | * If we're receiving traffic on a port, we can safely assume it's up (modulo | |
75 | * unidrectional failures). BFD has a demand mode in which it can stay quiet | |
76 | * unless it feels the need to check the status of the port. Using this, we | |
77 | * can implement a strategy in which BFD only sends control messages on dark | |
78 | * interfaces. | |
79 | * | |
80 | * - Depending on how one interprets the spec, it appears that a BFD session | |
81 | * can never change bfd.LocalDiag to "No Diagnostic". We should verify that | |
82 | * this is what hardware implementations actually do. Seems like "No | |
83 | * Diagnostic" should be set once a BFD session state goes UP. */ | |
84 | ||
85 | #define BFD_VERSION 1 | |
86 | ||
87 | enum flags { | |
88 | FLAG_MULTIPOINT = 1 << 0, | |
89 | FLAG_DEMAND = 1 << 1, | |
90 | FLAG_AUTH = 1 << 2, | |
91 | FLAG_CTL = 1 << 3, | |
92 | FLAG_FINAL = 1 << 4, | |
93 | FLAG_POLL = 1 << 5 | |
94 | }; | |
95 | ||
96 | enum state { | |
97 | STATE_ADMIN_DOWN = 0 << 6, | |
98 | STATE_DOWN = 1 << 6, | |
99 | STATE_INIT = 2 << 6, | |
100 | STATE_UP = 3 << 6 | |
101 | }; | |
102 | ||
103 | enum diag { | |
104 | DIAG_NONE = 0, /* No Diagnostic. */ | |
105 | DIAG_EXPIRED = 1, /* Control Detection Time Expired. */ | |
106 | DIAG_ECHO_FAILED = 2, /* Echo Function Failed. */ | |
107 | DIAG_RMT_DOWN = 3, /* Neighbor Signaled Session Down. */ | |
108 | DIAG_FWD_RESET = 4, /* Forwarding Plane Reset. */ | |
109 | DIAG_PATH_DOWN = 5, /* Path Down. */ | |
110 | DIAG_CPATH_DOWN = 6, /* Concatenated Path Down. */ | |
111 | DIAG_ADMIN_DOWN = 7, /* Administratively Down. */ | |
112 | DIAG_RCPATH_DOWN = 8 /* Reverse Concatenated Path Down. */ | |
113 | }; | |
114 | ||
115 | /* RFC 5880 Section 4.1 | |
116 | * 0 1 2 3 | |
117 | * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | |
118 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
119 | * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | | |
120 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
121 | * | My Discriminator | | |
122 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
123 | * | Your Discriminator | | |
124 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
125 | * | Desired Min TX Interval | | |
126 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
127 | * | Required Min RX Interval | | |
128 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
129 | * | Required Min Echo RX Interval | | |
130 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ | |
131 | struct msg { | |
132 | uint8_t vers_diag; /* Version and diagnostic. */ | |
133 | uint8_t flags; /* 2bit State field followed by flags. */ | |
134 | uint8_t mult; /* Fault detection multiplier. */ | |
135 | uint8_t length; /* Length of this BFD message. */ | |
136 | ovs_be32 my_disc; /* My discriminator. */ | |
137 | ovs_be32 your_disc; /* Your discriminator. */ | |
138 | ovs_be32 min_tx; /* Desired minimum tx interval. */ | |
139 | ovs_be32 min_rx; /* Required minimum rx interval. */ | |
140 | ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */ | |
141 | }; | |
142 | BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); | |
143 | ||
144 | #define DIAG_MASK 0x1f | |
145 | #define VERS_SHIFT 5 | |
146 | #define STATE_MASK 0xC0 | |
147 | #define FLAGS_MASK 0x3f | |
148 | ||
149 | struct bfd { | |
150 | struct hmap_node node; /* In 'all_bfds'. */ | |
151 | uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */ | |
152 | ||
153 | char *name; /* Name used for logging. */ | |
154 | ||
155 | bool cpath_down; /* Concatenated Path Down. */ | |
156 | uint8_t mult; /* bfd.DetectMult. */ | |
157 | ||
c1c4e8c7 AW |
158 | struct netdev *netdev; |
159 | uint64_t rx_packets; /* Packets received by 'netdev'. */ | |
160 | ||
ccc09689 EJ |
161 | enum state state; /* bfd.SessionState. */ |
162 | enum state rmt_state; /* bfd.RemoteSessionState. */ | |
163 | ||
164 | enum diag diag; /* bfd.LocalDiag. */ | |
165 | enum diag rmt_diag; /* Remote diagnostic. */ | |
166 | ||
167 | enum flags flags; /* Flags sent on messages. */ | |
168 | enum flags rmt_flags; /* Flags last received. */ | |
169 | ||
170 | uint32_t rmt_disc; /* bfd.RemoteDiscr. */ | |
171 | ||
de8d2ef9 GS |
172 | uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */ |
173 | bool eth_dst_set; /* 'eth_dst' set through database. */ | |
174 | ||
ccc09689 EJ |
175 | uint16_t udp_src; /* UDP source port. */ |
176 | ||
177 | /* All timers in milliseconds. */ | |
178 | long long int rmt_min_rx; /* bfd.RemoteMinRxInterval. */ | |
179 | long long int rmt_min_tx; /* Remote minimum TX interval. */ | |
180 | ||
181 | long long int cfg_min_tx; /* Configured minimum TX rate. */ | |
182 | long long int cfg_min_rx; /* Configured required minimum RX rate. */ | |
183 | long long int poll_min_tx; /* Min TX negotating in a poll sequence. */ | |
184 | long long int poll_min_rx; /* Min RX negotating in a poll sequence. */ | |
185 | long long int min_tx; /* bfd.DesiredMinTxInterval. */ | |
186 | long long int min_rx; /* bfd.RequiredMinRxInterval. */ | |
187 | ||
188 | long long int last_tx; /* Last TX time. */ | |
189 | long long int next_tx; /* Next TX time. */ | |
190 | long long int detect_time; /* RFC 5880 6.8.4 Detection time. */ | |
92cfab82 | 191 | |
a1aeea86 | 192 | bool last_forwarding; /* Last calculation of forwarding flag. */ |
91aaf124 | 193 | int forwarding_override; /* Manual override of 'forwarding' status. */ |
26131299 EJ |
194 | |
195 | atomic_bool check_tnl_key; /* Verify tunnel key of inbound packets? */ | |
196 | atomic_int ref_cnt; | |
c1c4e8c7 | 197 | |
01d18a3a AW |
198 | /* When forward_if_rx is true, bfd_forwarding() will return |
199 | * true as long as there are incoming packets received. | |
200 | * Note, forwarding_override still has higher priority. */ | |
201 | bool forwarding_if_rx; | |
202 | long long int forwarding_if_rx_detect_time; | |
203 | ||
c1c4e8c7 AW |
204 | /* BFD decay related variables. */ |
205 | bool in_decay; /* True when bfd is in decay. */ | |
206 | int decay_min_rx; /* min_rx is set to decay_min_rx when */ | |
207 | /* in decay. */ | |
208 | int decay_rx_ctl; /* Count bfd packets received within decay */ | |
209 | /* detect interval. */ | |
01d18a3a | 210 | uint64_t decay_rx_packets; /* Packets received by 'netdev'. */ |
c1c4e8c7 | 211 | long long int decay_detect_time; /* Decay detection time. */ |
4905e2df AW |
212 | |
213 | uint64_t flap_count; /* Counts bfd forwarding flaps. */ | |
ccc09689 EJ |
214 | }; |
215 | ||
26131299 EJ |
216 | static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; |
217 | static struct hmap all_bfds__ = HMAP_INITIALIZER(&all_bfds__); | |
218 | static struct hmap *const all_bfds OVS_GUARDED_BY(mutex) = &all_bfds__; | |
219 | ||
9cdc68a1 | 220 | static bool bfd_forwarding__(struct bfd *) OVS_REQUIRES(mutex); |
344e21d4 AW |
221 | static bool bfd_in_poll(const struct bfd *) OVS_REQUIRES(mutex); |
222 | static void bfd_poll(struct bfd *bfd) OVS_REQUIRES(mutex); | |
223 | static const char *bfd_diag_str(enum diag) OVS_REQUIRES(mutex); | |
224 | static const char *bfd_state_str(enum state) OVS_REQUIRES(mutex); | |
225 | static long long int bfd_min_tx(const struct bfd *) OVS_REQUIRES(mutex); | |
26131299 | 226 | static long long int bfd_tx_interval(const struct bfd *) |
344e21d4 | 227 | OVS_REQUIRES(mutex); |
26131299 | 228 | static long long int bfd_rx_interval(const struct bfd *) |
344e21d4 AW |
229 | OVS_REQUIRES(mutex); |
230 | static void bfd_set_next_tx(struct bfd *) OVS_REQUIRES(mutex); | |
26131299 | 231 | static void bfd_set_state(struct bfd *, enum state, enum diag) |
344e21d4 AW |
232 | OVS_REQUIRES(mutex); |
233 | static uint32_t generate_discriminator(void) OVS_REQUIRES(mutex); | |
26131299 | 234 | static void bfd_put_details(struct ds *, const struct bfd *) |
344e21d4 | 235 | OVS_REQUIRES(mutex); |
c1c4e8c7 AW |
236 | static uint64_t bfd_rx_packets(const struct bfd *) OVS_REQUIRES(mutex); |
237 | static void bfd_try_decay(struct bfd *) OVS_REQUIRES(mutex); | |
238 | static void bfd_decay_update(struct bfd *) OVS_REQUIRES(mutex); | |
a1aeea86 | 239 | |
01d18a3a | 240 | static void bfd_forwarding_if_rx_update(struct bfd *) OVS_REQUIRES(mutex); |
ccc09689 EJ |
241 | static void bfd_unixctl_show(struct unixctl_conn *, int argc, |
242 | const char *argv[], void *aux OVS_UNUSED); | |
91aaf124 PR |
243 | static void bfd_unixctl_set_forwarding_override(struct unixctl_conn *, |
244 | int argc, const char *argv[], | |
245 | void *aux OVS_UNUSED); | |
ccc09689 | 246 | static void log_msg(enum vlog_level, const struct msg *, const char *message, |
344e21d4 | 247 | const struct bfd *) OVS_REQUIRES(mutex); |
ccc09689 EJ |
248 | |
249 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 20); | |
ccc09689 EJ |
250 | |
251 | /* Returns true if the interface on which 'bfd' is running may be used to | |
252 | * forward traffic according to the BFD session state. */ | |
253 | bool | |
9cdc68a1 | 254 | bfd_forwarding(struct bfd *bfd) OVS_EXCLUDED(mutex) |
ccc09689 | 255 | { |
26131299 | 256 | bool ret; |
91aaf124 | 257 | |
26131299 EJ |
258 | ovs_mutex_lock(&mutex); |
259 | ret = bfd_forwarding__(bfd); | |
260 | ovs_mutex_unlock(&mutex); | |
261 | return ret; | |
ccc09689 EJ |
262 | } |
263 | ||
a1aeea86 AW |
264 | /* When forwarding_if_rx is enabled, if there are packets received, |
265 | * updates forwarding_if_rx_detect_time. */ | |
266 | void | |
267 | bfd_account_rx(struct bfd *bfd, const struct dpif_flow_stats *stats) | |
268 | { | |
269 | if (stats->n_packets && bfd->forwarding_if_rx) { | |
270 | ovs_mutex_lock(&mutex); | |
271 | bfd_forwarding__(bfd); | |
272 | bfd_forwarding_if_rx_update(bfd); | |
273 | bfd_forwarding__(bfd); | |
274 | ovs_mutex_unlock(&mutex); | |
275 | } | |
276 | } | |
277 | ||
ccc09689 EJ |
278 | /* Returns a 'smap' of key value pairs representing the status of 'bfd' |
279 | * intended for the OVS database. */ | |
280 | void | |
281 | bfd_get_status(const struct bfd *bfd, struct smap *smap) | |
26131299 | 282 | OVS_EXCLUDED(mutex) |
ccc09689 | 283 | { |
26131299 | 284 | ovs_mutex_lock(&mutex); |
a1aeea86 AW |
285 | smap_add(smap, "forwarding", |
286 | bfd_forwarding__(CONST_CAST(struct bfd *, bfd)) | |
287 | ? "true" : "false"); | |
ccc09689 EJ |
288 | smap_add(smap, "state", bfd_state_str(bfd->state)); |
289 | smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag)); | |
4905e2df | 290 | smap_add_format(smap, "flap_count", "%"PRIu64, bfd->flap_count); |
ccc09689 EJ |
291 | |
292 | if (bfd->state != STATE_DOWN) { | |
293 | smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state)); | |
294 | smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag)); | |
295 | } | |
26131299 | 296 | ovs_mutex_unlock(&mutex); |
ccc09689 EJ |
297 | } |
298 | ||
299 | /* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'), | |
300 | * according to the database configuration contained in 'cfg'. Takes ownership | |
301 | * of 'bfd', which may be NULL. Returns a BFD object which may be used as a | |
8aee94b6 PR |
302 | * handle for the session, or NULL if BFD is not enabled according to 'cfg'. |
303 | * Also returns NULL if cfg is NULL. */ | |
ccc09689 | 304 | struct bfd * |
c1c4e8c7 AW |
305 | bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, |
306 | struct netdev *netdev) OVS_EXCLUDED(mutex) | |
ccc09689 | 307 | { |
26131299 EJ |
308 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; |
309 | static atomic_uint16_t udp_src = ATOMIC_VAR_INIT(0); | |
ccc09689 | 310 | |
c1c4e8c7 | 311 | int decay_min_rx; |
ccc09689 | 312 | long long int min_tx, min_rx; |
2a833280 | 313 | bool need_poll = false; |
c1c4e8c7 | 314 | bool cfg_min_rx_changed = false; |
01d18a3a | 315 | bool cpath_down, forwarding_if_rx; |
de8d2ef9 GS |
316 | const char *hwaddr; |
317 | uint8_t ea[ETH_ADDR_LEN]; | |
ccc09689 | 318 | |
26131299 | 319 | if (ovsthread_once_start(&once)) { |
ccc09689 EJ |
320 | unixctl_command_register("bfd/show", "[interface]", 0, 1, |
321 | bfd_unixctl_show, NULL); | |
91aaf124 PR |
322 | unixctl_command_register("bfd/set-forwarding", |
323 | "[interface] normal|false|true", 1, 2, | |
324 | bfd_unixctl_set_forwarding_override, NULL); | |
26131299 | 325 | ovsthread_once_done(&once); |
ccc09689 EJ |
326 | } |
327 | ||
8aee94b6 | 328 | if (!cfg || !smap_get_bool(cfg, "enable", false)) { |
92cfab82 | 329 | bfd_unref(bfd); |
ccc09689 EJ |
330 | return NULL; |
331 | } | |
332 | ||
26131299 | 333 | ovs_mutex_lock(&mutex); |
ccc09689 EJ |
334 | if (!bfd) { |
335 | bfd = xzalloc(sizeof *bfd); | |
336 | bfd->name = xstrdup(name); | |
91aaf124 | 337 | bfd->forwarding_override = -1; |
ccc09689 | 338 | bfd->disc = generate_discriminator(); |
26131299 | 339 | hmap_insert(all_bfds, &bfd->node, bfd->disc); |
ccc09689 EJ |
340 | |
341 | bfd->diag = DIAG_NONE; | |
342 | bfd->min_tx = 1000; | |
343 | bfd->mult = 3; | |
26131299 | 344 | atomic_init(&bfd->ref_cnt, 1); |
c1c4e8c7 | 345 | bfd->netdev = netdev_ref(netdev); |
01d18a3a | 346 | bfd->rx_packets = bfd_rx_packets(bfd); |
c1c4e8c7 | 347 | bfd->in_decay = false; |
4905e2df | 348 | bfd->flap_count = 0; |
ccc09689 EJ |
349 | |
350 | /* RFC 5881 section 4 | |
351 | * The source port MUST be in the range 49152 through 65535. The same | |
352 | * UDP source port number MUST be used for all BFD Control packets | |
353 | * associated with a particular session. The source port number SHOULD | |
354 | * be unique among all BFD sessions on the system. */ | |
26131299 EJ |
355 | atomic_add(&udp_src, 1, &bfd->udp_src); |
356 | bfd->udp_src = (bfd->udp_src % 16384) + 49152; | |
ccc09689 EJ |
357 | |
358 | bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); | |
de8d2ef9 GS |
359 | |
360 | memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); | |
ccc09689 EJ |
361 | } |
362 | ||
26131299 EJ |
363 | atomic_store(&bfd->check_tnl_key, |
364 | smap_get_bool(cfg, "check_tnl_key", false)); | |
ccc09689 EJ |
365 | min_tx = smap_get_int(cfg, "min_tx", 100); |
366 | min_tx = MAX(min_tx, 100); | |
367 | if (bfd->cfg_min_tx != min_tx) { | |
368 | bfd->cfg_min_tx = min_tx; | |
369 | if (bfd->state != STATE_UP | |
370 | || (!bfd_in_poll(bfd) && bfd->cfg_min_tx < bfd->min_tx)) { | |
371 | bfd->min_tx = bfd->cfg_min_tx; | |
372 | } | |
2a833280 | 373 | need_poll = true; |
ccc09689 EJ |
374 | } |
375 | ||
376 | min_rx = smap_get_int(cfg, "min_rx", 1000); | |
377 | min_rx = MAX(min_rx, 100); | |
378 | if (bfd->cfg_min_rx != min_rx) { | |
379 | bfd->cfg_min_rx = min_rx; | |
380 | if (bfd->state != STATE_UP | |
381 | || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx)) { | |
382 | bfd->min_rx = bfd->cfg_min_rx; | |
383 | } | |
c1c4e8c7 AW |
384 | cfg_min_rx_changed = true; |
385 | need_poll = true; | |
386 | } | |
387 | ||
388 | decay_min_rx = smap_get_int(cfg, "decay_min_rx", 0); | |
389 | if (bfd->decay_min_rx != decay_min_rx || cfg_min_rx_changed) { | |
390 | if (decay_min_rx > 0 && decay_min_rx < bfd->cfg_min_rx) { | |
391 | VLOG_WARN("%s: decay_min_rx cannot be less than %lld ms", | |
392 | bfd->name, bfd->cfg_min_rx); | |
393 | bfd->decay_min_rx = 0; | |
394 | } else { | |
395 | bfd->decay_min_rx = decay_min_rx; | |
396 | } | |
397 | /* Resets decay. */ | |
398 | bfd->in_decay = false; | |
399 | bfd_decay_update(bfd); | |
2a833280 | 400 | need_poll = true; |
ccc09689 EJ |
401 | } |
402 | ||
403 | cpath_down = smap_get_bool(cfg, "cpath_down", false); | |
404 | if (bfd->cpath_down != cpath_down) { | |
405 | bfd->cpath_down = cpath_down; | |
406 | if (bfd->diag == DIAG_NONE || bfd->diag == DIAG_CPATH_DOWN) { | |
407 | bfd_set_state(bfd, bfd->state, DIAG_NONE); | |
408 | } | |
2a833280 | 409 | need_poll = true; |
ccc09689 | 410 | } |
de8d2ef9 GS |
411 | |
412 | hwaddr = smap_get(cfg, "bfd_dst_mac"); | |
413 | if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { | |
414 | memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN); | |
415 | bfd->eth_dst_set = true; | |
416 | } else if (bfd->eth_dst_set) { | |
417 | memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); | |
418 | bfd->eth_dst_set = false; | |
419 | } | |
420 | ||
01d18a3a AW |
421 | forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false); |
422 | if (bfd->forwarding_if_rx != forwarding_if_rx) { | |
423 | bfd->forwarding_if_rx = forwarding_if_rx; | |
424 | if (bfd->state == STATE_UP && bfd->forwarding_if_rx) { | |
425 | bfd_forwarding_if_rx_update(bfd); | |
426 | } else { | |
427 | bfd->forwarding_if_rx_detect_time = 0; | |
428 | } | |
429 | } | |
430 | ||
2a833280 AW |
431 | if (need_poll) { |
432 | bfd_poll(bfd); | |
433 | } | |
26131299 | 434 | ovs_mutex_unlock(&mutex); |
ccc09689 EJ |
435 | return bfd; |
436 | } | |
437 | ||
92cfab82 EJ |
438 | struct bfd * |
439 | bfd_ref(const struct bfd *bfd_) | |
440 | { | |
441 | struct bfd *bfd = CONST_CAST(struct bfd *, bfd_); | |
442 | if (bfd) { | |
26131299 EJ |
443 | int orig; |
444 | atomic_add(&bfd->ref_cnt, 1, &orig); | |
445 | ovs_assert(orig > 0); | |
92cfab82 EJ |
446 | } |
447 | return bfd; | |
448 | } | |
449 | ||
450 | void | |
26131299 | 451 | bfd_unref(struct bfd *bfd) OVS_EXCLUDED(mutex) |
92cfab82 EJ |
452 | { |
453 | if (bfd) { | |
26131299 EJ |
454 | int orig; |
455 | ||
456 | atomic_sub(&bfd->ref_cnt, 1, &orig); | |
457 | ovs_assert(orig > 0); | |
458 | if (orig == 1) { | |
459 | ovs_mutex_lock(&mutex); | |
460 | hmap_remove(all_bfds, &bfd->node); | |
c1c4e8c7 | 461 | netdev_close(bfd->netdev); |
92cfab82 EJ |
462 | free(bfd->name); |
463 | free(bfd); | |
26131299 | 464 | ovs_mutex_unlock(&mutex); |
92cfab82 EJ |
465 | } |
466 | } | |
467 | } | |
468 | ||
ccc09689 | 469 | void |
26131299 | 470 | bfd_wait(const struct bfd *bfd) OVS_EXCLUDED(mutex) |
ccc09689 | 471 | { |
88e4462e AW |
472 | poll_timer_wait_until(bfd_wake_time(bfd)); |
473 | } | |
474 | ||
475 | /* Returns the next wake up time. */ | |
476 | long long int | |
477 | bfd_wake_time(const struct bfd *bfd) OVS_EXCLUDED(mutex) | |
478 | { | |
479 | long long int retval; | |
480 | ||
481 | if (!bfd) { | |
482 | return LLONG_MAX; | |
ccc09689 EJ |
483 | } |
484 | ||
88e4462e AW |
485 | ovs_mutex_lock(&mutex); |
486 | if (bfd->flags & FLAG_FINAL) { | |
487 | retval = 0; | |
488 | } else { | |
489 | retval = bfd->next_tx; | |
490 | if (bfd->state > STATE_DOWN) { | |
491 | retval = MIN(bfd->detect_time, retval); | |
492 | } | |
ccc09689 | 493 | } |
26131299 | 494 | ovs_mutex_unlock(&mutex); |
88e4462e | 495 | return retval; |
ccc09689 EJ |
496 | } |
497 | ||
498 | void | |
26131299 | 499 | bfd_run(struct bfd *bfd) OVS_EXCLUDED(mutex) |
ccc09689 | 500 | { |
c1c4e8c7 AW |
501 | long long int now; |
502 | bool old_in_decay; | |
503 | ||
26131299 | 504 | ovs_mutex_lock(&mutex); |
c1c4e8c7 AW |
505 | now = time_msec(); |
506 | old_in_decay = bfd->in_decay; | |
507 | ||
508 | if (bfd->state > STATE_DOWN && now >= bfd->detect_time) { | |
ccc09689 EJ |
509 | bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED); |
510 | } | |
f23d157c | 511 | bfd_forwarding__(bfd); |
ccc09689 | 512 | |
c1c4e8c7 AW |
513 | /* Decay may only happen when state is STATE_UP, bfd->decay_min_rx is |
514 | * configured, and decay_detect_time is reached. */ | |
515 | if (bfd->state == STATE_UP && bfd->decay_min_rx > 0 | |
516 | && now >= bfd->decay_detect_time) { | |
517 | bfd_try_decay(bfd); | |
518 | } | |
519 | ||
520 | if (bfd->min_tx != bfd->cfg_min_tx | |
521 | || (bfd->min_rx != bfd->cfg_min_rx && bfd->min_rx != bfd->decay_min_rx) | |
522 | || bfd->in_decay != old_in_decay) { | |
ccc09689 EJ |
523 | bfd_poll(bfd); |
524 | } | |
26131299 | 525 | ovs_mutex_unlock(&mutex); |
ccc09689 EJ |
526 | } |
527 | ||
528 | bool | |
26131299 | 529 | bfd_should_send_packet(const struct bfd *bfd) OVS_EXCLUDED(mutex) |
ccc09689 | 530 | { |
26131299 EJ |
531 | bool ret; |
532 | ovs_mutex_lock(&mutex); | |
533 | ret = bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx; | |
534 | ovs_mutex_unlock(&mutex); | |
535 | return ret; | |
ccc09689 EJ |
536 | } |
537 | ||
538 | void | |
539 | bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, | |
26131299 | 540 | uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex) |
ccc09689 EJ |
541 | { |
542 | long long int min_tx, min_rx; | |
543 | struct udp_header *udp; | |
544 | struct eth_header *eth; | |
545 | struct ip_header *ip; | |
546 | struct msg *msg; | |
547 | ||
26131299 | 548 | ovs_mutex_lock(&mutex); |
ccc09689 EJ |
549 | if (bfd->next_tx) { |
550 | long long int delay = time_msec() - bfd->next_tx; | |
551 | long long int interval = bfd_tx_interval(bfd); | |
552 | if (delay > interval * 3 / 2) { | |
6a690106 | 553 | VLOG_INFO("%s: long delay of %lldms (expected %lldms) sending BFD" |
ccc09689 EJ |
554 | " control message", bfd->name, delay, interval); |
555 | } | |
556 | } | |
557 | ||
558 | /* RFC 5880 Section 6.5 | |
559 | * A BFD Control packet MUST NOT have both the Poll (P) and Final (F) bits | |
560 | * set. */ | |
561 | ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); | |
562 | ||
563 | ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ | |
564 | eth = ofpbuf_put_uninit(p, sizeof *eth); | |
ccc09689 | 565 | memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); |
de8d2ef9 | 566 | memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); |
ccc09689 EJ |
567 | eth->eth_type = htons(ETH_TYPE_IP); |
568 | ||
569 | ip = ofpbuf_put_zeros(p, sizeof *ip); | |
570 | ip->ip_ihl_ver = IP_IHL_VER(5, 4); | |
571 | ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); | |
b644259f PR |
572 | ip->ip_ttl = MAXTTL; |
573 | ip->ip_tos = IPTOS_LOWDELAY | IPTOS_THROUGHPUT; | |
ccc09689 | 574 | ip->ip_proto = IPPROTO_UDP; |
7c457c33 BP |
575 | /* Use link local addresses: */ |
576 | put_16aligned_be32(&ip->ip_src, htonl(0xA9FE0100)); /* 169.254.1.0. */ | |
577 | put_16aligned_be32(&ip->ip_dst, htonl(0xA9FE0101)); /* 169.254.1.1. */ | |
ccc09689 EJ |
578 | ip->ip_csum = csum(ip, sizeof *ip); |
579 | ||
580 | udp = ofpbuf_put_zeros(p, sizeof *udp); | |
581 | udp->udp_src = htons(bfd->udp_src); | |
582 | udp->udp_dst = htons(BFD_DEST_PORT); | |
583 | udp->udp_len = htons(sizeof *udp + sizeof *msg); | |
584 | ||
585 | msg = ofpbuf_put_uninit(p, sizeof *msg); | |
586 | msg->vers_diag = (BFD_VERSION << 5) | bfd->diag; | |
587 | msg->flags = (bfd->state & STATE_MASK) | bfd->flags; | |
588 | ||
589 | msg->mult = bfd->mult; | |
590 | msg->length = BFD_PACKET_LEN; | |
591 | msg->my_disc = htonl(bfd->disc); | |
592 | msg->your_disc = htonl(bfd->rmt_disc); | |
593 | msg->min_rx_echo = htonl(0); | |
594 | ||
595 | if (bfd_in_poll(bfd)) { | |
596 | min_tx = bfd->poll_min_tx; | |
597 | min_rx = bfd->poll_min_rx; | |
598 | } else { | |
599 | min_tx = bfd_min_tx(bfd); | |
600 | min_rx = bfd->min_rx; | |
601 | } | |
602 | ||
603 | msg->min_tx = htonl(min_tx * 1000); | |
604 | msg->min_rx = htonl(min_rx * 1000); | |
605 | ||
606 | bfd->flags &= ~FLAG_FINAL; | |
607 | ||
608 | log_msg(VLL_DBG, msg, "Sending BFD Message", bfd); | |
609 | ||
610 | bfd->last_tx = time_msec(); | |
611 | bfd_set_next_tx(bfd); | |
26131299 | 612 | ovs_mutex_unlock(&mutex); |
ccc09689 EJ |
613 | } |
614 | ||
615 | bool | |
5675cb4c | 616 | bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow, |
fab52e16 | 617 | struct flow_wildcards *wc) |
ccc09689 | 618 | { |
5675cb4c | 619 | struct bfd *bfd = CONST_CAST(struct bfd *, bfd_); |
26131299 | 620 | bool check_tnl_key; |
5675cb4c | 621 | |
de8d2ef9 GS |
622 | memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); |
623 | if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) { | |
624 | return false; | |
625 | } | |
26131299 | 626 | |
642dc74d JP |
627 | memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); |
628 | memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); | |
26131299 EJ |
629 | |
630 | atomic_read(&bfd->check_tnl_key, &check_tnl_key); | |
631 | if (check_tnl_key) { | |
fab52e16 PR |
632 | memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id); |
633 | } | |
ccc09689 EJ |
634 | return (flow->dl_type == htons(ETH_TYPE_IP) |
635 | && flow->nw_proto == IPPROTO_UDP | |
de8d2ef9 | 636 | && flow->tp_dst == htons(BFD_DEST_PORT) |
2d21cf72 | 637 | && (!check_tnl_key || flow->tunnel.tun_id == htonll(0))); |
ccc09689 EJ |
638 | } |
639 | ||
640 | void | |
641 | bfd_process_packet(struct bfd *bfd, const struct flow *flow, | |
26131299 | 642 | const struct ofpbuf *p) OVS_EXCLUDED(mutex) |
ccc09689 EJ |
643 | { |
644 | uint32_t rmt_min_rx, pkt_your_disc; | |
645 | enum state rmt_state; | |
646 | enum flags flags; | |
647 | uint8_t version; | |
648 | struct msg *msg; | |
649 | ||
650 | /* This function is designed to follow section RFC 5880 6.8.6 closely. */ | |
651 | ||
26131299 | 652 | ovs_mutex_lock(&mutex); |
c1c4e8c7 AW |
653 | /* Increments the decay rx counter. */ |
654 | bfd->decay_rx_ctl++; | |
655 | ||
a1aeea86 AW |
656 | bfd_forwarding__(bfd); |
657 | ||
ccc09689 EJ |
658 | if (flow->nw_ttl != 255) { |
659 | /* XXX Should drop in the kernel to prevent DOS. */ | |
26131299 | 660 | goto out; |
ccc09689 EJ |
661 | } |
662 | ||
663 | msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN); | |
664 | if (!msg) { | |
bc47dcfd | 665 | VLOG_INFO_RL(&rl, "%s: Received too-short BFD control message (only " |
34582733 | 666 | "%"PRIdPTR" bytes long, at least %d required).", |
bc47dcfd BP |
667 | bfd->name, (uint8_t *) ofpbuf_tail(p) - (uint8_t *) p->l7, |
668 | BFD_PACKET_LEN); | |
26131299 | 669 | goto out; |
ccc09689 EJ |
670 | } |
671 | ||
672 | /* RFC 5880 Section 6.8.6 | |
673 | * If the Length field is greater than the payload of the encapsulating | |
674 | * protocol, the packet MUST be discarded. | |
675 | * | |
676 | * Note that we make this check implicity. Above we use ofpbuf_at() to | |
677 | * ensure that there are at least BFD_PACKET_LEN bytes in the payload of | |
678 | * the encapsulating protocol. Below we require msg->length to be exactly | |
679 | * BFD_PACKET_LEN bytes. */ | |
680 | ||
681 | flags = msg->flags & FLAGS_MASK; | |
682 | rmt_state = msg->flags & STATE_MASK; | |
683 | version = msg->vers_diag >> VERS_SHIFT; | |
684 | ||
685 | log_msg(VLL_DBG, msg, "Received BFD control message", bfd); | |
686 | ||
687 | if (version != BFD_VERSION) { | |
688 | log_msg(VLL_WARN, msg, "Incorrect version", bfd); | |
26131299 | 689 | goto out; |
ccc09689 EJ |
690 | } |
691 | ||
692 | /* Technically this should happen after the length check. We don't support | |
693 | * authentication however, so it's simpler to do the check first. */ | |
694 | if (flags & FLAG_AUTH) { | |
695 | log_msg(VLL_WARN, msg, "Authenticated control message with" | |
696 | " authentication disabled", bfd); | |
26131299 | 697 | goto out; |
ccc09689 EJ |
698 | } |
699 | ||
700 | if (msg->length != BFD_PACKET_LEN) { | |
701 | log_msg(VLL_WARN, msg, "Unexpected length", bfd); | |
702 | if (msg->length < BFD_PACKET_LEN) { | |
26131299 | 703 | goto out; |
ccc09689 EJ |
704 | } |
705 | } | |
706 | ||
707 | if (!msg->mult) { | |
708 | log_msg(VLL_WARN, msg, "Zero multiplier", bfd); | |
26131299 | 709 | goto out; |
ccc09689 EJ |
710 | } |
711 | ||
712 | if (flags & FLAG_MULTIPOINT) { | |
713 | log_msg(VLL_WARN, msg, "Unsupported multipoint flag", bfd); | |
26131299 | 714 | goto out; |
ccc09689 EJ |
715 | } |
716 | ||
717 | if (!msg->my_disc) { | |
718 | log_msg(VLL_WARN, msg, "NULL my_disc", bfd); | |
26131299 | 719 | goto out; |
ccc09689 EJ |
720 | } |
721 | ||
722 | pkt_your_disc = ntohl(msg->your_disc); | |
723 | if (pkt_your_disc) { | |
724 | /* Technically, we should use the your discriminator field to figure | |
725 | * out which 'struct bfd' this packet is destined towards. That way a | |
726 | * bfd session could migrate from one interface to another | |
727 | * transparently. This doesn't fit in with the OVS structure very | |
728 | * well, so in this respect, we are not compliant. */ | |
729 | if (pkt_your_disc != bfd->disc) { | |
730 | log_msg(VLL_WARN, msg, "Incorrect your_disc", bfd); | |
26131299 | 731 | goto out; |
ccc09689 EJ |
732 | } |
733 | } else if (rmt_state > STATE_DOWN) { | |
734 | log_msg(VLL_WARN, msg, "Null your_disc", bfd); | |
26131299 | 735 | goto out; |
ccc09689 EJ |
736 | } |
737 | ||
738 | bfd->rmt_disc = ntohl(msg->my_disc); | |
739 | bfd->rmt_state = rmt_state; | |
740 | bfd->rmt_flags = flags; | |
741 | bfd->rmt_diag = msg->vers_diag & DIAG_MASK; | |
742 | ||
743 | if (flags & FLAG_FINAL && bfd_in_poll(bfd)) { | |
744 | bfd->min_tx = bfd->poll_min_tx; | |
745 | bfd->min_rx = bfd->poll_min_rx; | |
746 | bfd->flags &= ~FLAG_POLL; | |
747 | log_msg(VLL_INFO, msg, "Poll sequence terminated", bfd); | |
748 | } | |
749 | ||
750 | if (flags & FLAG_POLL) { | |
751 | /* RFC 5880 Section 6.5 | |
752 | * When the other system receives a Poll, it immediately transmits a | |
753 | * BFD Control packet with the Final (F) bit set, independent of any | |
754 | * periodic BFD Control packets it may be sending | |
755 | * (see section 6.8.7). */ | |
756 | bfd->flags &= ~FLAG_POLL; | |
757 | bfd->flags |= FLAG_FINAL; | |
758 | } | |
759 | ||
760 | rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1); | |
761 | if (bfd->rmt_min_rx != rmt_min_rx) { | |
762 | bfd->rmt_min_rx = rmt_min_rx; | |
122bbcc4 JS |
763 | if (bfd->next_tx) { |
764 | bfd_set_next_tx(bfd); | |
765 | } | |
ccc09689 EJ |
766 | log_msg(VLL_INFO, msg, "New remote min_rx", bfd); |
767 | } | |
768 | ||
769 | bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1); | |
770 | bfd->detect_time = bfd_rx_interval(bfd) * bfd->mult + time_msec(); | |
771 | ||
772 | if (bfd->state == STATE_ADMIN_DOWN) { | |
773 | VLOG_DBG_RL(&rl, "Administratively down, dropping control message."); | |
26131299 | 774 | goto out; |
ccc09689 EJ |
775 | } |
776 | ||
777 | if (rmt_state == STATE_ADMIN_DOWN) { | |
778 | if (bfd->state != STATE_DOWN) { | |
779 | bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); | |
780 | } | |
781 | } else { | |
782 | switch (bfd->state) { | |
783 | case STATE_DOWN: | |
784 | if (rmt_state == STATE_DOWN) { | |
785 | bfd_set_state(bfd, STATE_INIT, bfd->diag); | |
786 | } else if (rmt_state == STATE_INIT) { | |
787 | bfd_set_state(bfd, STATE_UP, bfd->diag); | |
788 | } | |
789 | break; | |
790 | case STATE_INIT: | |
791 | if (rmt_state > STATE_DOWN) { | |
792 | bfd_set_state(bfd, STATE_UP, bfd->diag); | |
793 | } | |
794 | break; | |
795 | case STATE_UP: | |
796 | if (rmt_state <= STATE_DOWN) { | |
797 | bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); | |
798 | log_msg(VLL_INFO, msg, "Remote signaled STATE_DOWN", bfd); | |
799 | } | |
800 | break; | |
801 | case STATE_ADMIN_DOWN: | |
802 | default: | |
428b2edd | 803 | OVS_NOT_REACHED(); |
ccc09689 EJ |
804 | } |
805 | } | |
806 | /* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */ | |
26131299 EJ |
807 | |
808 | out: | |
a1aeea86 | 809 | bfd_forwarding__(bfd); |
26131299 | 810 | ovs_mutex_unlock(&mutex); |
ccc09689 | 811 | } |
c1c4e8c7 AW |
812 | |
813 | /* Must be called when the netdev owned by 'bfd' should change. */ | |
814 | void | |
815 | bfd_set_netdev(struct bfd *bfd, const struct netdev *netdev) | |
816 | OVS_EXCLUDED(mutex) | |
817 | { | |
818 | ovs_mutex_lock(&mutex); | |
819 | if (bfd->netdev != netdev) { | |
820 | netdev_close(bfd->netdev); | |
821 | bfd->netdev = netdev_ref(netdev); | |
01d18a3a | 822 | if (bfd->decay_min_rx && bfd->state == STATE_UP) { |
c1c4e8c7 AW |
823 | bfd_decay_update(bfd); |
824 | } | |
01d18a3a AW |
825 | if (bfd->forwarding_if_rx && bfd->state == STATE_UP) { |
826 | bfd_forwarding_if_rx_update(bfd); | |
827 | } | |
828 | bfd->rx_packets = bfd_rx_packets(bfd); | |
c1c4e8c7 AW |
829 | } |
830 | ovs_mutex_unlock(&mutex); | |
831 | } | |
832 | ||
ccc09689 | 833 | \f |
4905e2df | 834 | /* Updates the forwarding flag. If override is not configured and |
a1aeea86 AW |
835 | * the forwarding flag value changes, increments the flap count. |
836 | * | |
837 | * Note this function may be called multiple times in a function | |
838 | * (e.g. bfd_account_rx) before and after the bfd state or status | |
839 | * change. This is to capture any forwarding flag flap. */ | |
26131299 | 840 | static bool |
9cdc68a1 | 841 | bfd_forwarding__(struct bfd *bfd) OVS_REQUIRES(mutex) |
26131299 | 842 | { |
01d18a3a | 843 | long long int time; |
a1aeea86 | 844 | bool last_forwarding = bfd->last_forwarding; |
01d18a3a | 845 | |
26131299 EJ |
846 | if (bfd->forwarding_override != -1) { |
847 | return bfd->forwarding_override == 1; | |
848 | } | |
849 | ||
01d18a3a | 850 | time = bfd->forwarding_if_rx_detect_time; |
a1aeea86 AW |
851 | bfd->last_forwarding = (bfd->state == STATE_UP |
852 | || (bfd->forwarding_if_rx && time > time_msec())) | |
853 | && bfd->rmt_diag != DIAG_PATH_DOWN | |
854 | && bfd->rmt_diag != DIAG_CPATH_DOWN | |
855 | && bfd->rmt_diag != DIAG_RCPATH_DOWN; | |
856 | if (bfd->last_forwarding != last_forwarding) { | |
4905e2df | 857 | bfd->flap_count++; |
f23d157c | 858 | seq_change(connectivity_seq_get()); |
4905e2df | 859 | } |
a1aeea86 | 860 | return bfd->last_forwarding; |
26131299 EJ |
861 | } |
862 | ||
ccc09689 EJ |
863 | /* Helpers. */ |
864 | static bool | |
bd3950dd | 865 | bfd_in_poll(const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
866 | { |
867 | return (bfd->flags & FLAG_POLL) != 0; | |
868 | } | |
869 | ||
870 | static void | |
bd3950dd | 871 | bfd_poll(struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
872 | { |
873 | if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd) | |
874 | && !(bfd->flags & FLAG_FINAL)) { | |
875 | bfd->poll_min_tx = bfd->cfg_min_tx; | |
c1c4e8c7 | 876 | bfd->poll_min_rx = bfd->in_decay ? bfd->decay_min_rx : bfd->cfg_min_rx; |
ccc09689 EJ |
877 | bfd->flags |= FLAG_POLL; |
878 | bfd->next_tx = 0; | |
879 | VLOG_INFO_RL(&rl, "%s: Initiating poll sequence", bfd->name); | |
880 | } | |
881 | } | |
882 | ||
883 | static long long int | |
bd3950dd | 884 | bfd_min_tx(const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
885 | { |
886 | /* RFC 5880 Section 6.8.3 | |
887 | * When bfd.SessionState is not Up, the system MUST set | |
888 | * bfd.DesiredMinTxInterval to a value of not less than one second | |
889 | * (1,000,000 microseconds). This is intended to ensure that the | |
890 | * bandwidth consumed by BFD sessions that are not Up is negligible, | |
891 | * particularly in the case where a neighbor may not be running BFD. */ | |
892 | return (bfd->state == STATE_UP ? bfd->min_tx : MAX(bfd->min_tx, 1000)); | |
893 | } | |
894 | ||
895 | static long long int | |
bd3950dd | 896 | bfd_tx_interval(const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
897 | { |
898 | long long int interval = bfd_min_tx(bfd); | |
899 | return MAX(interval, bfd->rmt_min_rx); | |
900 | } | |
901 | ||
902 | static long long int | |
bd3950dd | 903 | bfd_rx_interval(const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
904 | { |
905 | return MAX(bfd->min_rx, bfd->rmt_min_tx); | |
906 | } | |
907 | ||
908 | static void | |
bd3950dd | 909 | bfd_set_next_tx(struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
910 | { |
911 | long long int interval = bfd_tx_interval(bfd); | |
912 | interval -= interval * random_range(26) / 100; | |
913 | bfd->next_tx = bfd->last_tx + interval; | |
914 | } | |
915 | ||
916 | static const char * | |
917 | bfd_flag_str(enum flags flags) | |
918 | { | |
919 | struct ds ds = DS_EMPTY_INITIALIZER; | |
920 | static char flag_str[128]; | |
921 | ||
922 | if (!flags) { | |
923 | return "none"; | |
924 | } | |
925 | ||
926 | if (flags & FLAG_MULTIPOINT) { | |
927 | ds_put_cstr(&ds, "multipoint "); | |
928 | } | |
929 | ||
930 | if (flags & FLAG_DEMAND) { | |
931 | ds_put_cstr(&ds, "demand "); | |
932 | } | |
933 | ||
934 | if (flags & FLAG_AUTH) { | |
935 | ds_put_cstr(&ds, "auth "); | |
936 | } | |
937 | ||
938 | if (flags & FLAG_CTL) { | |
939 | ds_put_cstr(&ds, "ctl "); | |
940 | } | |
941 | ||
942 | if (flags & FLAG_FINAL) { | |
943 | ds_put_cstr(&ds, "final "); | |
944 | } | |
945 | ||
946 | if (flags & FLAG_POLL) { | |
947 | ds_put_cstr(&ds, "poll "); | |
948 | } | |
949 | ||
70e575d9 AW |
950 | /* Do not copy the trailing whitespace. */ |
951 | ds_chomp(&ds, ' '); | |
ccc09689 EJ |
952 | ovs_strlcpy(flag_str, ds_cstr(&ds), sizeof flag_str); |
953 | ds_destroy(&ds); | |
954 | return flag_str; | |
955 | } | |
956 | ||
957 | static const char * | |
958 | bfd_state_str(enum state state) | |
959 | { | |
960 | switch (state) { | |
961 | case STATE_ADMIN_DOWN: return "admin_down"; | |
962 | case STATE_DOWN: return "down"; | |
963 | case STATE_INIT: return "init"; | |
964 | case STATE_UP: return "up"; | |
965 | default: return "invalid"; | |
966 | } | |
967 | } | |
968 | ||
969 | static const char * | |
970 | bfd_diag_str(enum diag diag) { | |
971 | switch (diag) { | |
972 | case DIAG_NONE: return "No Diagnostic"; | |
973 | case DIAG_EXPIRED: return "Control Detection Time Expired"; | |
974 | case DIAG_ECHO_FAILED: return "Echo Function Failed"; | |
975 | case DIAG_RMT_DOWN: return "Neighbor Signaled Session Down"; | |
976 | case DIAG_FWD_RESET: return "Forwarding Plane Reset"; | |
977 | case DIAG_PATH_DOWN: return "Path Down"; | |
978 | case DIAG_CPATH_DOWN: return "Concatenated Path Down"; | |
979 | case DIAG_ADMIN_DOWN: return "Administratively Down"; | |
980 | case DIAG_RCPATH_DOWN: return "Reverse Concatenated Path Down"; | |
981 | default: return "Invalid Diagnostic"; | |
982 | } | |
983 | }; | |
984 | ||
985 | static void | |
986 | log_msg(enum vlog_level level, const struct msg *p, const char *message, | |
bd3950dd | 987 | const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 EJ |
988 | { |
989 | struct ds ds = DS_EMPTY_INITIALIZER; | |
990 | ||
991 | if (vlog_should_drop(THIS_MODULE, level, &rl)) { | |
992 | return; | |
993 | } | |
994 | ||
995 | ds_put_format(&ds, | |
996 | "%s: %s." | |
997 | "\n\tvers:%"PRIu8" diag:\"%s\" state:%s mult:%"PRIu8 | |
998 | " length:%"PRIu8 | |
999 | "\n\tflags: %s" | |
1000 | "\n\tmy_disc:0x%"PRIx32" your_disc:0x%"PRIx32 | |
1001 | "\n\tmin_tx:%"PRIu32"us (%"PRIu32"ms)" | |
1002 | "\n\tmin_rx:%"PRIu32"us (%"PRIu32"ms)" | |
1003 | "\n\tmin_rx_echo:%"PRIu32"us (%"PRIu32"ms)", | |
1004 | bfd->name, message, p->vers_diag >> VERS_SHIFT, | |
1005 | bfd_diag_str(p->vers_diag & DIAG_MASK), | |
1006 | bfd_state_str(p->flags & STATE_MASK), | |
1007 | p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK), | |
1008 | ntohl(p->my_disc), ntohl(p->your_disc), | |
1009 | ntohl(p->min_tx), ntohl(p->min_tx) / 1000, | |
1010 | ntohl(p->min_rx), ntohl(p->min_rx) / 1000, | |
1011 | ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000); | |
1012 | bfd_put_details(&ds, bfd); | |
1013 | VLOG(level, "%s", ds_cstr(&ds)); | |
1014 | ds_destroy(&ds); | |
1015 | } | |
1016 | ||
1017 | static void | |
1018 | bfd_set_state(struct bfd *bfd, enum state state, enum diag diag) | |
bd3950dd | 1019 | OVS_REQUIRES(mutex) |
ccc09689 EJ |
1020 | { |
1021 | if (diag == DIAG_NONE && bfd->cpath_down) { | |
1022 | diag = DIAG_CPATH_DOWN; | |
1023 | } | |
1024 | ||
1025 | if (bfd->state != state || bfd->diag != diag) { | |
1026 | if (!VLOG_DROP_INFO(&rl)) { | |
1027 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1028 | ||
1029 | ds_put_format(&ds, "%s: BFD state change: %s->%s" | |
1030 | " \"%s\"->\"%s\".\n", | |
1031 | bfd->name, bfd_state_str(bfd->state), | |
1032 | bfd_state_str(state), bfd_diag_str(bfd->diag), | |
1033 | bfd_diag_str(diag)); | |
1034 | bfd_put_details(&ds, bfd); | |
1035 | VLOG_INFO("%s", ds_cstr(&ds)); | |
1036 | ds_destroy(&ds); | |
1037 | } | |
1038 | ||
1039 | bfd->state = state; | |
1040 | bfd->diag = diag; | |
1041 | ||
1042 | if (bfd->state <= STATE_DOWN) { | |
1043 | bfd->rmt_state = STATE_DOWN; | |
1044 | bfd->rmt_diag = DIAG_NONE; | |
1045 | bfd->rmt_min_rx = 1; | |
1046 | bfd->rmt_flags = 0; | |
1047 | bfd->rmt_disc = 0; | |
1048 | bfd->rmt_min_tx = 0; | |
c1c4e8c7 AW |
1049 | /* Resets the min_rx if in_decay. */ |
1050 | if (bfd->in_decay) { | |
1051 | bfd->min_rx = bfd->cfg_min_rx; | |
1052 | bfd->in_decay = false; | |
1053 | } | |
ccc09689 | 1054 | } |
c1c4e8c7 AW |
1055 | /* Resets the decay when state changes to STATE_UP |
1056 | * and decay_min_rx is configured. */ | |
1057 | if (bfd->state == STATE_UP && bfd->decay_min_rx) { | |
1058 | bfd_decay_update(bfd); | |
1059 | } | |
f23d157c JS |
1060 | |
1061 | seq_change(connectivity_seq_get()); | |
c1c4e8c7 AW |
1062 | } |
1063 | } | |
1064 | ||
1065 | static uint64_t | |
1066 | bfd_rx_packets(const struct bfd *bfd) OVS_REQUIRES(mutex) | |
1067 | { | |
1068 | struct netdev_stats stats; | |
1069 | ||
1070 | if (!netdev_get_stats(bfd->netdev, &stats)) { | |
1071 | return stats.rx_packets; | |
1072 | } else { | |
1073 | return 0; | |
ccc09689 EJ |
1074 | } |
1075 | } | |
1076 | ||
c1c4e8c7 AW |
1077 | /* Decays the bfd->min_rx to bfd->decay_min_rx when 'diff' is less than |
1078 | * the 'expect' value. */ | |
1079 | static void | |
1080 | bfd_try_decay(struct bfd *bfd) OVS_REQUIRES(mutex) | |
1081 | { | |
1082 | int64_t diff, expect; | |
1083 | ||
1084 | /* The 'diff' is the difference between current interface rx_packets | |
1085 | * stats and last-time check. The 'expect' is the recorded number of | |
1086 | * bfd control packets received within an approximately decay_min_rx | |
1087 | * (2000 ms if decay_min_rx is less than 2000 ms) interval. | |
1088 | * | |
1089 | * Since the update of rx_packets stats at interface happens | |
1090 | * asynchronously to the bfd_rx_packets() function, the 'diff' value | |
1091 | * can be jittered. Thusly, we double the decay_rx_ctl to provide | |
1092 | * more wiggle room. */ | |
01d18a3a | 1093 | diff = bfd_rx_packets(bfd) - bfd->decay_rx_packets; |
c1c4e8c7 AW |
1094 | expect = 2 * MAX(bfd->decay_rx_ctl, 1); |
1095 | bfd->in_decay = diff <= expect ? true : false; | |
1096 | bfd_decay_update(bfd); | |
1097 | } | |
1098 | ||
1099 | /* Updates the rx_packets, decay_rx_ctl and decay_detect_time. */ | |
1100 | static void | |
1101 | bfd_decay_update(struct bfd * bfd) OVS_REQUIRES(mutex) | |
1102 | { | |
01d18a3a | 1103 | bfd->decay_rx_packets = bfd_rx_packets(bfd); |
c1c4e8c7 AW |
1104 | bfd->decay_rx_ctl = 0; |
1105 | bfd->decay_detect_time = MAX(bfd->decay_min_rx, 2000) + time_msec(); | |
1106 | } | |
1107 | ||
01d18a3a AW |
1108 | static void |
1109 | bfd_forwarding_if_rx_update(struct bfd *bfd) OVS_REQUIRES(mutex) | |
1110 | { | |
1111 | int64_t incr = bfd_rx_interval(bfd) * bfd->mult; | |
1112 | bfd->forwarding_if_rx_detect_time = MAX(incr, 2000) + time_msec(); | |
1113 | } | |
1114 | ||
ccc09689 EJ |
1115 | static uint32_t |
1116 | generate_discriminator(void) | |
1117 | { | |
1118 | uint32_t disc = 0; | |
1119 | ||
1120 | /* RFC 5880 Section 6.8.1 | |
1121 | * It SHOULD be set to a random (but still unique) value to improve | |
1122 | * security. The value is otherwise outside the scope of this | |
1123 | * specification. */ | |
1124 | ||
1125 | while (!disc) { | |
1126 | struct bfd *bfd; | |
1127 | ||
5798ed6d | 1128 | /* 'disc' is by definition random, so there's no reason to waste time |
ccc09689 EJ |
1129 | * hashing it. */ |
1130 | disc = random_uint32(); | |
26131299 | 1131 | HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, all_bfds) { |
ccc09689 EJ |
1132 | if (bfd->disc == disc) { |
1133 | disc = 0; | |
1134 | break; | |
1135 | } | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | return disc; | |
1140 | } | |
1141 | ||
1142 | static struct bfd * | |
bd3950dd | 1143 | bfd_find_by_name(const char *name) OVS_REQUIRES(mutex) |
ccc09689 EJ |
1144 | { |
1145 | struct bfd *bfd; | |
1146 | ||
26131299 | 1147 | HMAP_FOR_EACH (bfd, node, all_bfds) { |
ccc09689 EJ |
1148 | if (!strcmp(bfd->name, name)) { |
1149 | return bfd; | |
1150 | } | |
1151 | } | |
1152 | return NULL; | |
1153 | } | |
1154 | ||
1155 | static void | |
bd3950dd | 1156 | bfd_put_details(struct ds *ds, const struct bfd *bfd) OVS_REQUIRES(mutex) |
ccc09689 | 1157 | { |
a1aeea86 AW |
1158 | ds_put_format(ds, "\tForwarding: %s\n", |
1159 | bfd_forwarding__(CONST_CAST(struct bfd *, bfd)) | |
1160 | ? "true" : "false"); | |
ccc09689 EJ |
1161 | ds_put_format(ds, "\tDetect Multiplier: %d\n", bfd->mult); |
1162 | ds_put_format(ds, "\tConcatenated Path Down: %s\n", | |
1163 | bfd->cpath_down ? "true" : "false"); | |
1164 | ds_put_format(ds, "\tTX Interval: Approx %lldms\n", bfd_tx_interval(bfd)); | |
1165 | ds_put_format(ds, "\tRX Interval: Approx %lldms\n", bfd_rx_interval(bfd)); | |
1166 | ds_put_format(ds, "\tDetect Time: now %+lldms\n", | |
1167 | time_msec() - bfd->detect_time); | |
1168 | ds_put_format(ds, "\tNext TX Time: now %+lldms\n", | |
1169 | time_msec() - bfd->next_tx); | |
1170 | ds_put_format(ds, "\tLast TX Time: now %+lldms\n", | |
1171 | time_msec() - bfd->last_tx); | |
1172 | ||
1173 | ds_put_cstr(ds, "\n"); | |
1174 | ||
1175 | ds_put_format(ds, "\tLocal Flags: %s\n", bfd_flag_str(bfd->flags)); | |
1176 | ds_put_format(ds, "\tLocal Session State: %s\n", | |
1177 | bfd_state_str(bfd->state)); | |
1178 | ds_put_format(ds, "\tLocal Diagnostic: %s\n", bfd_diag_str(bfd->diag)); | |
1179 | ds_put_format(ds, "\tLocal Discriminator: 0x%"PRIx32"\n", bfd->disc); | |
1180 | ds_put_format(ds, "\tLocal Minimum TX Interval: %lldms\n", | |
1181 | bfd_min_tx(bfd)); | |
1182 | ds_put_format(ds, "\tLocal Minimum RX Interval: %lldms\n", bfd->min_rx); | |
1183 | ||
1184 | ds_put_cstr(ds, "\n"); | |
1185 | ||
1186 | ds_put_format(ds, "\tRemote Flags: %s\n", bfd_flag_str(bfd->rmt_flags)); | |
1187 | ds_put_format(ds, "\tRemote Session State: %s\n", | |
1188 | bfd_state_str(bfd->rmt_state)); | |
1189 | ds_put_format(ds, "\tRemote Diagnostic: %s\n", | |
1190 | bfd_diag_str(bfd->rmt_diag)); | |
1191 | ds_put_format(ds, "\tRemote Discriminator: 0x%"PRIx32"\n", bfd->rmt_disc); | |
1192 | ds_put_format(ds, "\tRemote Minimum TX Interval: %lldms\n", | |
1193 | bfd->rmt_min_tx); | |
1194 | ds_put_format(ds, "\tRemote Minimum RX Interval: %lldms\n", | |
1195 | bfd->rmt_min_rx); | |
1196 | } | |
1197 | ||
1198 | static void | |
1199 | bfd_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], | |
26131299 | 1200 | void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) |
ccc09689 EJ |
1201 | { |
1202 | struct ds ds = DS_EMPTY_INITIALIZER; | |
1203 | struct bfd *bfd; | |
1204 | ||
26131299 | 1205 | ovs_mutex_lock(&mutex); |
ccc09689 EJ |
1206 | if (argc > 1) { |
1207 | bfd = bfd_find_by_name(argv[1]); | |
1208 | if (!bfd) { | |
1209 | unixctl_command_reply_error(conn, "no such bfd object"); | |
26131299 | 1210 | goto out; |
ccc09689 EJ |
1211 | } |
1212 | bfd_put_details(&ds, bfd); | |
1213 | } else { | |
26131299 | 1214 | HMAP_FOR_EACH (bfd, node, all_bfds) { |
ccc09689 EJ |
1215 | ds_put_format(&ds, "---- %s ----\n", bfd->name); |
1216 | bfd_put_details(&ds, bfd); | |
1217 | } | |
1218 | } | |
1219 | unixctl_command_reply(conn, ds_cstr(&ds)); | |
1220 | ds_destroy(&ds); | |
26131299 EJ |
1221 | |
1222 | out: | |
1223 | ovs_mutex_unlock(&mutex); | |
ccc09689 | 1224 | } |
91aaf124 PR |
1225 | |
1226 | ||
1227 | static void | |
1228 | bfd_unixctl_set_forwarding_override(struct unixctl_conn *conn, int argc, | |
1229 | const char *argv[], void *aux OVS_UNUSED) | |
26131299 | 1230 | OVS_EXCLUDED(mutex) |
91aaf124 PR |
1231 | { |
1232 | const char *forward_str = argv[argc - 1]; | |
1233 | int forwarding_override; | |
1234 | struct bfd *bfd; | |
1235 | ||
26131299 | 1236 | ovs_mutex_lock(&mutex); |
91aaf124 PR |
1237 | if (!strcasecmp("true", forward_str)) { |
1238 | forwarding_override = 1; | |
1239 | } else if (!strcasecmp("false", forward_str)) { | |
1240 | forwarding_override = 0; | |
1241 | } else if (!strcasecmp("normal", forward_str)) { | |
1242 | forwarding_override = -1; | |
1243 | } else { | |
1244 | unixctl_command_reply_error(conn, "unknown fault string"); | |
26131299 | 1245 | goto out; |
91aaf124 PR |
1246 | } |
1247 | ||
1248 | if (argc > 2) { | |
1249 | bfd = bfd_find_by_name(argv[1]); | |
1250 | if (!bfd) { | |
1251 | unixctl_command_reply_error(conn, "no such BFD object"); | |
26131299 | 1252 | goto out; |
91aaf124 PR |
1253 | } |
1254 | bfd->forwarding_override = forwarding_override; | |
1255 | } else { | |
26131299 | 1256 | HMAP_FOR_EACH (bfd, node, all_bfds) { |
91aaf124 PR |
1257 | bfd->forwarding_override = forwarding_override; |
1258 | } | |
1259 | } | |
1260 | ||
1261 | unixctl_command_reply(conn, "OK"); | |
26131299 EJ |
1262 | |
1263 | out: | |
1264 | ovs_mutex_unlock(&mutex); | |
91aaf124 | 1265 | } |