]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_io.c
lib: add frr_with_mutex() block-wrapper
[mirror_frr.git] / bgpd / bgp_io.c
1 /* BGP I/O.
2 * Implements packet I/O in a pthread.
3 * Copyright (C) 2017 Cumulus Networks
4 * Quentin Young
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
19 * MA 02110-1301 USA
20 */
21
22 /* clang-format off */
23 #include <zebra.h>
24 #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock
25
26 #include "frr_pthread.h"
27 #include "linklist.h" // for list_delete, list_delete_all_node, lis...
28 #include "log.h" // for zlog_debug, safe_strerror, zlog_err
29 #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE
30 #include "network.h" // for ERRNO_IO_RETRY
31 #include "stream.h" // for stream_get_endp, stream_getw_from, str...
32 #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_...
33 #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread...
34 #include "zassert.h" // for assert
35
36 #include "bgpd/bgp_io.h"
37 #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str
38 #include "bgpd/bgp_errors.h" // for expanded error reference information
39 #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event
40 #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify...
41 #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm
42 /* clang-format on */
43
44 /* forward declarations */
45 static uint16_t bgp_write(struct peer *);
46 static uint16_t bgp_read(struct peer *);
47 static int bgp_process_writes(struct thread *);
48 static int bgp_process_reads(struct thread *);
49 static bool validate_header(struct peer *);
50
51 /* generic i/o status codes */
52 #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred
53 #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error
54
55 /* Thread external API ----------------------------------------------------- */
56
57 void bgp_writes_on(struct peer *peer)
58 {
59 struct frr_pthread *fpt = bgp_pth_io;
60 assert(fpt->running);
61
62 assert(peer->status != Deleted);
63 assert(peer->obuf);
64 assert(peer->ibuf);
65 assert(peer->ibuf_work);
66 assert(!peer->t_connect_check_r);
67 assert(!peer->t_connect_check_w);
68 assert(peer->fd);
69
70 thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd,
71 &peer->t_write);
72 SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
73 }
74
75 void bgp_writes_off(struct peer *peer)
76 {
77 struct frr_pthread *fpt = bgp_pth_io;
78 assert(fpt->running);
79
80 thread_cancel_async(fpt->master, &peer->t_write, NULL);
81 THREAD_OFF(peer->t_generate_updgrp_packets);
82
83 UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
84 }
85
86 void bgp_reads_on(struct peer *peer)
87 {
88 struct frr_pthread *fpt = bgp_pth_io;
89 assert(fpt->running);
90
91 assert(peer->status != Deleted);
92 assert(peer->ibuf);
93 assert(peer->fd);
94 assert(peer->ibuf_work);
95 assert(peer->obuf);
96 assert(!peer->t_connect_check_r);
97 assert(!peer->t_connect_check_w);
98 assert(peer->fd);
99
100 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
101 &peer->t_read);
102
103 SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
104 }
105
106 void bgp_reads_off(struct peer *peer)
107 {
108 struct frr_pthread *fpt = bgp_pth_io;
109 assert(fpt->running);
110
111 thread_cancel_async(fpt->master, &peer->t_read, NULL);
112 THREAD_OFF(peer->t_process_packet);
113
114 UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
115 }
116
117 /* Thread internal functions ----------------------------------------------- */
118
119 /*
120 * Called from I/O pthread when a file descriptor has become ready for writing.
121 */
122 static int bgp_process_writes(struct thread *thread)
123 {
124 static struct peer *peer;
125 peer = THREAD_ARG(thread);
126 uint16_t status;
127 bool reschedule;
128 bool fatal = false;
129
130 if (peer->fd < 0)
131 return -1;
132
133 struct frr_pthread *fpt = bgp_pth_io;
134
135 frr_with_mutex(&peer->io_mtx) {
136 status = bgp_write(peer);
137 reschedule = (stream_fifo_head(peer->obuf) != NULL);
138 }
139
140 /* no problem */
141 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
142 }
143
144 /* problem */
145 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
146 reschedule = false;
147 fatal = true;
148 }
149
150 if (reschedule) {
151 thread_add_write(fpt->master, bgp_process_writes, peer,
152 peer->fd, &peer->t_write);
153 } else if (!fatal) {
154 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
155 bgp_generate_updgrp_packets, 0);
156 }
157
158 return 0;
159 }
160
161 /*
162 * Called from I/O pthread when a file descriptor has become ready for reading,
163 * or has hung up.
164 *
165 * We read as much data as possible, process as many packets as we can and
166 * place them on peer->ibuf for secondary processing by the main thread.
167 */
168 static int bgp_process_reads(struct thread *thread)
169 {
170 /* clang-format off */
171 static struct peer *peer; // peer to read from
172 uint16_t status; // bgp_read status code
173 bool more = true; // whether we got more data
174 bool fatal = false; // whether fatal error occurred
175 bool added_pkt = false; // whether we pushed onto ->ibuf
176 /* clang-format on */
177
178 peer = THREAD_ARG(thread);
179
180 if (peer->fd < 0 || bm->terminating)
181 return -1;
182
183 struct frr_pthread *fpt = bgp_pth_io;
184
185 frr_with_mutex(&peer->io_mtx) {
186 status = bgp_read(peer);
187 }
188
189 /* error checking phase */
190 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) {
191 /* no problem; just don't process packets */
192 more = false;
193 }
194
195 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) {
196 /* problem; tear down session */
197 more = false;
198 fatal = true;
199 }
200
201 while (more) {
202 /* static buffer for transferring packets */
203 static unsigned char pktbuf[BGP_MAX_PACKET_SIZE];
204 /* shorter alias to peer's input buffer */
205 struct ringbuf *ibw = peer->ibuf_work;
206 /* packet size as given by header */
207 uint16_t pktsize = 0;
208
209 /* check that we have enough data for a header */
210 if (ringbuf_remain(ibw) < BGP_HEADER_SIZE)
211 break;
212
213 /* check that header is valid */
214 if (!validate_header(peer)) {
215 fatal = true;
216 break;
217 }
218
219 /* header is valid; retrieve packet size */
220 ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize));
221
222 pktsize = ntohs(pktsize);
223
224 /* if this fails we are seriously screwed */
225 assert(pktsize <= BGP_MAX_PACKET_SIZE);
226
227 /*
228 * If we have that much data, chuck it into its own
229 * stream and append to input queue for processing.
230 */
231 if (ringbuf_remain(ibw) >= pktsize) {
232 struct stream *pkt = stream_new(pktsize);
233 assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize);
234 stream_put(pkt, pktbuf, pktsize);
235
236 frr_with_mutex(&peer->io_mtx) {
237 stream_fifo_push(peer->ibuf, pkt);
238 }
239
240 added_pkt = true;
241 } else
242 break;
243 }
244
245 assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE);
246
247 /* handle invalid header */
248 if (fatal) {
249 /* wipe buffer just in case someone screwed up */
250 ringbuf_wipe(peer->ibuf_work);
251 } else {
252 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
253 &peer->t_read);
254 if (added_pkt)
255 thread_add_timer_msec(bm->master, bgp_process_packet,
256 peer, 0, &peer->t_process_packet);
257 }
258
259 return 0;
260 }
261
262 /*
263 * Flush peer output buffer.
264 *
265 * This function pops packets off of peer->obuf and writes them to peer->fd.
266 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
267 * and the number of packets on the output buffer, unless an error occurs.
268 *
269 * If write() returns an error, the appropriate FSM event is generated.
270 *
271 * The return value is equal to the number of packets written
272 * (which may be zero).
273 */
274 static uint16_t bgp_write(struct peer *peer)
275 {
276 uint8_t type;
277 struct stream *s;
278 int num;
279 int update_last_write = 0;
280 unsigned int count = 0;
281 uint32_t uo = 0;
282 uint16_t status = 0;
283 uint32_t wpkt_quanta_old;
284
285 wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
286 memory_order_relaxed);
287
288 while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
289 int writenum;
290 do {
291 writenum = stream_get_endp(s) - stream_get_getp(s);
292 num = write(peer->fd, stream_pnt(s), writenum);
293
294 if (num < 0) {
295 if (!ERRNO_IO_RETRY(errno)) {
296 BGP_EVENT_ADD(peer, TCP_fatal_error);
297 SET_FLAG(status, BGP_IO_FATAL_ERR);
298 } else {
299 SET_FLAG(status, BGP_IO_TRANS_ERR);
300 }
301
302 goto done;
303 } else if (num != writenum)
304 stream_forward_getp(s, num);
305
306 } while (num != writenum);
307
308 /* Retrieve BGP packet type. */
309 stream_set_getp(s, BGP_MARKER_SIZE + 2);
310 type = stream_getc(s);
311
312 switch (type) {
313 case BGP_MSG_OPEN:
314 atomic_fetch_add_explicit(&peer->open_out, 1,
315 memory_order_relaxed);
316 break;
317 case BGP_MSG_UPDATE:
318 atomic_fetch_add_explicit(&peer->update_out, 1,
319 memory_order_relaxed);
320 uo++;
321 break;
322 case BGP_MSG_NOTIFY:
323 atomic_fetch_add_explicit(&peer->notify_out, 1,
324 memory_order_relaxed);
325 /* Double start timer. */
326 peer->v_start *= 2;
327
328 /* Overflow check. */
329 if (peer->v_start >= (60 * 2))
330 peer->v_start = (60 * 2);
331
332 /*
333 * Handle Graceful Restart case where the state changes
334 * to Connect instead of Idle.
335 */
336 BGP_EVENT_ADD(peer, BGP_Stop);
337 goto done;
338
339 case BGP_MSG_KEEPALIVE:
340 atomic_fetch_add_explicit(&peer->keepalive_out, 1,
341 memory_order_relaxed);
342 break;
343 case BGP_MSG_ROUTE_REFRESH_NEW:
344 case BGP_MSG_ROUTE_REFRESH_OLD:
345 atomic_fetch_add_explicit(&peer->refresh_out, 1,
346 memory_order_relaxed);
347 break;
348 case BGP_MSG_CAPABILITY:
349 atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1,
350 memory_order_relaxed);
351 break;
352 }
353
354 count++;
355
356 stream_free(stream_fifo_pop(peer->obuf));
357 update_last_write = 1;
358 }
359
360 done : {
361 /*
362 * Update last_update if UPDATEs were written.
363 * Note: that these are only updated at end,
364 * not per message (i.e., per loop)
365 */
366 if (uo)
367 atomic_store_explicit(&peer->last_update, bgp_clock(),
368 memory_order_relaxed);
369
370 /* If we TXed any flavor of packet */
371 if (update_last_write)
372 atomic_store_explicit(&peer->last_write, bgp_clock(),
373 memory_order_relaxed);
374 }
375
376 return status;
377 }
378
379 /*
380 * Reads a chunk of data from peer->fd into peer->ibuf_work.
381 *
382 * @return status flag (see top-of-file)
383 */
384 static uint16_t bgp_read(struct peer *peer)
385 {
386 size_t readsize; // how many bytes we want to read
387 ssize_t nbytes; // how many bytes we actually read
388 uint16_t status = 0;
389 static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX];
390
391 readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw));
392 nbytes = read(peer->fd, ibw, readsize);
393
394 /* EAGAIN or EWOULDBLOCK; come back later */
395 if (nbytes < 0 && ERRNO_IO_RETRY(errno)) {
396 SET_FLAG(status, BGP_IO_TRANS_ERR);
397 /* Fatal error; tear down session */
398 } else if (nbytes < 0) {
399 flog_err(EC_BGP_UPDATE_RCV,
400 "%s [Error] bgp_read_packet error: %s", peer->host,
401 safe_strerror(errno));
402
403 if (peer->status == Established) {
404 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
405 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
406 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
407 } else
408 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
409 }
410
411 BGP_EVENT_ADD(peer, TCP_fatal_error);
412 SET_FLAG(status, BGP_IO_FATAL_ERR);
413 /* Received EOF / TCP session closed */
414 } else if (nbytes == 0) {
415 if (bgp_debug_neighbor_events(peer))
416 zlog_debug("%s [Event] BGP connection closed fd %d",
417 peer->host, peer->fd);
418
419 if (peer->status == Established) {
420 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
421 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
422 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
423 } else
424 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
425 }
426
427 BGP_EVENT_ADD(peer, TCP_connection_closed);
428 SET_FLAG(status, BGP_IO_FATAL_ERR);
429 } else {
430 assert(ringbuf_put(peer->ibuf_work, ibw, nbytes)
431 == (size_t)nbytes);
432 }
433
434 return status;
435 }
436
437 /*
438 * Called after we have read a BGP packet header. Validates marker, message
439 * type and packet length. If any of these aren't correct, sends a notify.
440 *
441 * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input
442 * buffer.
443 */
444 static bool validate_header(struct peer *peer)
445 {
446 uint16_t size;
447 uint8_t type;
448 struct ringbuf *pkt = peer->ibuf_work;
449
450 static uint8_t m_correct[BGP_MARKER_SIZE] = {
451 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
452 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
453 uint8_t m_rx[BGP_MARKER_SIZE] = {0x00};
454
455 if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE)
456 return false;
457
458 if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) {
459 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
460 BGP_NOTIFY_HEADER_NOT_SYNC);
461 return false;
462 }
463
464 /* Get size and type in network byte order. */
465 ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size));
466 ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type));
467
468 size = ntohs(size);
469
470 /* BGP type check. */
471 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
472 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
473 && type != BGP_MSG_ROUTE_REFRESH_NEW
474 && type != BGP_MSG_ROUTE_REFRESH_OLD
475 && type != BGP_MSG_CAPABILITY) {
476 if (bgp_debug_neighbor_events(peer))
477 zlog_debug("%s unknown message type 0x%02x", peer->host,
478 type);
479
480 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
481 BGP_NOTIFY_HEADER_BAD_MESTYPE, &type,
482 1);
483 return false;
484 }
485
486 /* Minimum packet length check. */
487 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
488 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
489 || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
490 || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
491 || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
492 || (type == BGP_MSG_ROUTE_REFRESH_NEW
493 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
494 || (type == BGP_MSG_ROUTE_REFRESH_OLD
495 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
496 || (type == BGP_MSG_CAPABILITY
497 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
498 if (bgp_debug_neighbor_events(peer)) {
499 zlog_debug("%s bad message length - %d for %s",
500 peer->host, size,
501 type == 128 ? "ROUTE-REFRESH"
502 : bgp_type_str[(int)type]);
503 }
504
505 uint16_t nsize = htons(size);
506
507 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
508 BGP_NOTIFY_HEADER_BAD_MESLEN,
509 (unsigned char *)&nsize, 2);
510 return false;
511 }
512
513 return true;
514 }