]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_io.c
bgpd: use memcmp to check bgp marker
[mirror_frr.git] / bgpd / bgp_io.c
CommitLineData
958b450c
QY
1/* BGP I/O.
2 * Implements packet I/O in a consumer pthread.
3 * Copyright (C) 2017 Cumulus Networks
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; see the file COPYING; if not, write to the
17 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
18 * MA 02110-1301 USA
56257a44
QY
19 */
20
21#include <zebra.h>
22#include <sys/time.h>
23#include <pthread.h>
24
25#include "thread.h"
26#include "hash.h"
27#include "stream.h"
28#include "memory.h"
29#include "log.h"
30#include "monotime.h"
31#include "network.h"
424ab01d 32#include "pqueue.h"
56257a44
QY
33
34#include "bgpd/bgpd.h"
35#include "bgpd/bgp_io.h"
36#include "bgpd/bgp_debug.h"
37#include "bgpd/bgp_packet.h"
38#include "bgpd/bgp_fsm.h"
39
424ab01d
QY
40/* forward declarations */
41static uint16_t bgp_write(struct peer *);
42static uint16_t bgp_read(struct peer *);
43static int bgp_process_writes(struct thread *);
44static int bgp_process_reads(struct thread *);
45static bool validate_header(struct peer *);
56257a44 46
424ab01d
QY
47/* generic i/o status codes */
48#define BGP_IO_TRANS_ERR (1 << 1) // EAGAIN or similar occurred
49#define BGP_IO_FATAL_ERR (1 << 2) // some kind of fatal TCP error
56257a44 50
424ab01d
QY
51/* bgp_read() status codes */
52#define BGP_IO_READ_HEADER (1 << 3) // when read a full packet header
53#define BGP_IO_READ_FULLPACKET (1 << 4) // read a full packet
54
55/* Start and stop routines for I/O pthread + control variables
56257a44 56 * ------------------------------------------------------------------------ */
424ab01d
QY
57bool bgp_packet_write_thread_run = false;
58pthread_mutex_t *work_mtx;
56257a44 59
424ab01d
QY
60static struct list *read_cancel;
61static struct list *write_cancel;
56257a44 62
424ab01d 63void bgp_io_init()
56257a44 64{
424ab01d
QY
65 work_mtx = XCALLOC(MTYPE_TMP, sizeof(pthread_mutex_t));
66 pthread_mutex_init(work_mtx, NULL);
67
68 read_cancel = list_new();
69 write_cancel = list_new();
56257a44 70}
56257a44 71
424ab01d 72void *bgp_io_start(void *arg)
56257a44 73{
424ab01d
QY
74 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
75
76 // we definitely don't want to handle signals
77 fpt->master->handle_signals = false;
78
79 bgp_packet_write_thread_run = true;
80 struct thread task;
81
82 while (bgp_packet_write_thread_run) {
83 if (thread_fetch(fpt->master, &task)) {
84 pthread_mutex_lock(work_mtx);
85 {
86 bool cancel = false;
87 struct peer *peer = THREAD_ARG(&task);
88 if ((task.func == bgp_process_reads
89 && listnode_lookup(read_cancel, peer))
90 || (task.func == bgp_process_writes
91 && listnode_lookup(write_cancel, peer)))
92 cancel = true;
93
94 list_delete_all_node(write_cancel);
95 list_delete_all_node(read_cancel);
96
97 if (!cancel)
98 thread_call(&task);
99 }
100 pthread_mutex_unlock(work_mtx);
101 }
102 }
56257a44 103
424ab01d 104 return NULL;
56257a44
QY
105}
106
424ab01d 107int bgp_io_stop(void **result, struct frr_pthread *fpt)
56257a44 108{
424ab01d
QY
109 fpt->master->spin = false;
110 bgp_packet_write_thread_run = false;
111 pthread_kill(fpt->thread, SIGINT);
112 pthread_join(fpt->thread, result);
56257a44 113
424ab01d
QY
114 pthread_mutex_unlock(work_mtx);
115 pthread_mutex_destroy(work_mtx);
56257a44 116
424ab01d
QY
117 list_delete(read_cancel);
118 list_delete(write_cancel);
119 XFREE(MTYPE_TMP, work_mtx);
120 return 0;
56257a44 121}
424ab01d 122/* ------------------------------------------------------------------------ */
56257a44 123
424ab01d 124void bgp_writes_on(struct peer *peer)
56257a44 125{
424ab01d
QY
126 assert(peer->status != Deleted);
127 assert(peer->obuf);
128 assert(peer->ibuf);
129 assert(peer->ibuf_work);
130 assert(!peer->t_connect_check);
131 assert(peer->fd);
56257a44 132
424ab01d 133 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
56257a44 134
424ab01d
QY
135 pthread_mutex_lock(work_mtx);
136 {
137 listnode_delete(write_cancel, peer);
138 thread_add_write(fpt->master, bgp_process_writes, peer,
139 peer->fd, &peer->t_write);
140 SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
56257a44 141 }
424ab01d
QY
142 pthread_mutex_unlock(work_mtx);
143}
56257a44 144
424ab01d
QY
145void bgp_writes_off(struct peer *peer)
146{
147 pthread_mutex_lock(work_mtx);
148 {
149 THREAD_OFF(peer->t_write);
150 THREAD_OFF(peer->t_generate_updgrp_packets);
151 listnode_add(write_cancel, peer);
56257a44 152
424ab01d
QY
153 // peer access by us after this point will result in pain
154 UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON);
155 }
156 pthread_mutex_unlock(work_mtx);
157 /* upon return, i/o thread must not access the peer */
56257a44
QY
158}
159
424ab01d 160void bgp_reads_on(struct peer *peer)
56257a44 161{
424ab01d
QY
162 assert(peer->status != Deleted);
163 assert(peer->ibuf);
164 assert(peer->fd);
165 assert(peer->ibuf_work);
166 assert(stream_get_endp(peer->ibuf_work) == 0);
167 assert(peer->obuf);
168 assert(!peer->t_connect_check);
169 assert(peer->fd);
170
171 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
172
173 pthread_mutex_lock(work_mtx);
174 {
175 listnode_delete(read_cancel, peer);
176 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
177 &peer->t_read);
178 SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
179 }
180 pthread_mutex_unlock(work_mtx);
56257a44
QY
181}
182
424ab01d 183void bgp_reads_off(struct peer *peer)
56257a44 184{
424ab01d 185 pthread_mutex_lock(work_mtx);
56257a44 186 {
424ab01d
QY
187 THREAD_OFF(peer->t_read);
188 THREAD_OFF(peer->t_process_packet);
189 listnode_add(read_cancel, peer);
56257a44 190
424ab01d
QY
191 // peer access by us after this point will result in pain
192 UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON);
56257a44 193 }
424ab01d 194 pthread_mutex_unlock(work_mtx);
56257a44
QY
195}
196
424ab01d
QY
197/**
198 * Called from PTHREAD_IO when select() or poll() determines that the file
199 * descriptor is ready to be written to.
200 */
201static int bgp_process_writes(struct thread *thread)
56257a44 202{
424ab01d
QY
203 static struct peer *peer;
204 peer = THREAD_ARG(thread);
205 uint16_t status;
206
207 if (peer->fd < 0)
208 return -1;
209
210 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
211
212 bool reschedule;
213 pthread_mutex_lock(&peer->io_mtx);
56257a44 214 {
424ab01d
QY
215 status = bgp_write(peer);
216 reschedule = (stream_fifo_head(peer->obuf) != NULL);
217 }
218 pthread_mutex_unlock(&peer->io_mtx);
56257a44 219
424ab01d 220 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
56257a44 221 }
56257a44 222
424ab01d
QY
223 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR))
224 reschedule = 0; // problem
225
226 if (reschedule) {
227 thread_add_write(fpt->master, bgp_process_writes, peer,
228 peer->fd, &peer->t_write);
229 thread_add_background(bm->master, bgp_generate_updgrp_packets,
230 peer, 0,
231 &peer->t_generate_updgrp_packets);
232 }
233
234 return 0;
56257a44
QY
235}
236
237/**
424ab01d
QY
238 * Called from PTHREAD_IO when select() or poll() determines that the file
239 * descriptor is ready to be read from.
56257a44 240 */
424ab01d 241static int bgp_process_reads(struct thread *thread)
56257a44
QY
242{
243 static struct peer *peer;
424ab01d
QY
244 peer = THREAD_ARG(thread);
245 uint16_t status;
246
247 if (peer->fd < 0)
248 return -1;
249
250 struct frr_pthread *fpt = frr_pthread_get(PTHREAD_IO);
251
252 bool reschedule = true;
253
254 // execute read
255 pthread_mutex_lock(&peer->io_mtx);
56257a44 256 {
424ab01d
QY
257 status = bgp_read(peer);
258 }
259 pthread_mutex_unlock(&peer->io_mtx);
260
261 // check results of read
262 bool header_valid = true;
263
264 if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { /* no problem */
56257a44 265 }
56257a44 266
424ab01d
QY
267 if (CHECK_FLAG(status, BGP_IO_FATAL_ERR))
268 reschedule = false; // problem
269
270 if (CHECK_FLAG(status, BGP_IO_READ_HEADER)) {
271 header_valid = validate_header(peer);
272 if (!header_valid) {
273 bgp_size_t packetsize =
274 MIN((int)stream_get_endp(peer->ibuf_work),
275 BGP_MAX_PACKET_SIZE);
276 memcpy(peer->last_reset_cause, peer->ibuf_work->data,
277 packetsize);
278 peer->last_reset_cause_size = packetsize;
279 // We're tearing the session down, no point in
280 // rescheduling.
281 // Additionally, bgp_read() will use the TLV if it's
282 // present to
283 // determine how much to read; if this is corrupt, we'll
284 // crash the
285 // program.
286 reschedule = false;
287 }
288 }
289
290 // if we read a full packet, push it onto peer->ibuf, reset our WiP
291 // buffer
292 // and schedule a job to process it on the main thread
293 if (header_valid && CHECK_FLAG(status, BGP_IO_READ_FULLPACKET)) {
294 pthread_mutex_lock(&peer->io_mtx);
295 {
296 stream_fifo_push(peer->ibuf,
297 stream_dup(peer->ibuf_work));
298 }
299 pthread_mutex_unlock(&peer->io_mtx);
300 stream_reset(peer->ibuf_work);
301 assert(stream_get_endp(peer->ibuf_work) == 0);
302
303 thread_add_background(bm->master, bgp_process_packet, peer, 0,
304 &peer->t_process_packet);
305 }
306
307 if (reschedule)
308 thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd,
309 &peer->t_read);
310
311 return 0;
56257a44
QY
312}
313
314/**
315 * Flush peer output buffer.
316 *
317 * This function pops packets off of peer->obuf and writes them to peer->fd.
318 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
424ab01d 319 * and the number of packets on the output buffer, unless an error occurs.
56257a44
QY
320 *
321 * If write() returns an error, the appropriate FSM event is generated.
322 *
323 * The return value is equal to the number of packets written
324 * (which may be zero).
325 */
424ab01d 326static uint16_t bgp_write(struct peer *peer)
56257a44
QY
327{
328 u_char type;
329 struct stream *s;
330 int num;
331 int update_last_write = 0;
332 unsigned int count = 0;
333 unsigned int oc = 0;
424ab01d 334 uint16_t status = 0;
56257a44 335
56257a44
QY
336 while (count < peer->bgp->wpkt_quanta
337 && (s = stream_fifo_head(peer->obuf))) {
338 int writenum;
339 do {
340 writenum = stream_get_endp(s) - stream_get_getp(s);
341 num = write(peer->fd, STREAM_PNT(s), writenum);
342
343 if (num < 0) {
424ab01d 344 if (!ERRNO_IO_RETRY(errno)) {
56257a44 345 BGP_EVENT_ADD(peer, TCP_fatal_error);
424ab01d
QY
346 SET_FLAG(status, BGP_IO_FATAL_ERR);
347 } else {
348 SET_FLAG(status, BGP_IO_TRANS_ERR);
349 }
56257a44
QY
350
351 goto done;
352 } else if (num != writenum) // incomplete write
353 stream_forward_getp(s, num);
354
355 } while (num != writenum);
356
357 /* Retrieve BGP packet type. */
358 stream_set_getp(s, BGP_MARKER_SIZE + 2);
359 type = stream_getc(s);
360
361 switch (type) {
362 case BGP_MSG_OPEN:
363 peer->open_out++;
364 break;
365 case BGP_MSG_UPDATE:
366 peer->update_out++;
367 break;
368 case BGP_MSG_NOTIFY:
369 peer->notify_out++;
370 /* Double start timer. */
371 peer->v_start *= 2;
372
373 /* Overflow check. */
374 if (peer->v_start >= (60 * 2))
375 peer->v_start = (60 * 2);
376
377 /* Handle Graceful Restart case where the state changes
378 to
379 Connect instead of Idle */
380 /* Flush any existing events */
381 BGP_EVENT_ADD(peer, BGP_Stop);
382 goto done;
383
384 case BGP_MSG_KEEPALIVE:
385 peer->keepalive_out++;
386 break;
387 case BGP_MSG_ROUTE_REFRESH_NEW:
388 case BGP_MSG_ROUTE_REFRESH_OLD:
389 peer->refresh_out++;
390 break;
391 case BGP_MSG_CAPABILITY:
392 peer->dynamic_cap_out++;
393 break;
394 }
395
396 count++;
424ab01d 397
56257a44
QY
398 stream_free(stream_fifo_pop(peer->obuf));
399 update_last_write = 1;
400 }
401
402done : {
403 /* Update last_update if UPDATEs were written. */
404 if (peer->update_out > oc)
405 peer->last_update = bgp_clock();
406
407 /* If we TXed any flavor of packet update last_write */
408 if (update_last_write)
409 peer->last_write = bgp_clock();
410}
411
424ab01d
QY
412 return status;
413}
414
415/**
416 * Reads <= 1 packet worth of data from peer->fd into peer->ibuf_work.
417 *
418 * @return whether a full packet was read
419 */
420static uint16_t bgp_read(struct peer *peer)
421{
422 int readsize; // how many bytes we want to read
423 int nbytes; // how many bytes we actually read
424 bool have_header = false;
425 uint16_t status = 0;
426
427 if (stream_get_endp(peer->ibuf_work) < BGP_HEADER_SIZE)
428 readsize = BGP_HEADER_SIZE - stream_get_endp(peer->ibuf_work);
429 else {
430 // retrieve packet length from tlv and compute # bytes we still
431 // need
432 u_int16_t mlen =
433 stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE);
434 readsize = mlen - stream_get_endp(peer->ibuf_work);
435 have_header = true;
436 }
437
438 nbytes = stream_read_try(peer->ibuf_work, peer->fd, readsize);
439
440 if (nbytes <= 0) // handle errors
441 {
442 switch (nbytes) {
443 case -1: // fatal error; tear down the session
444 zlog_err("%s [Error] bgp_read_packet error: %s",
445 peer->host, safe_strerror(errno));
446
447 if (peer->status == Established) {
448 if (CHECK_FLAG(peer->sflags,
449 PEER_STATUS_NSF_MODE)) {
450 peer->last_reset =
451 PEER_DOWN_NSF_CLOSE_SESSION;
452 SET_FLAG(peer->sflags,
453 PEER_STATUS_NSF_WAIT);
454 } else
455 peer->last_reset =
456 PEER_DOWN_CLOSE_SESSION;
457 }
458
459 BGP_EVENT_ADD(peer, TCP_fatal_error);
460 SET_FLAG(status, BGP_IO_FATAL_ERR);
461 break;
462
463 case 0: // TCP session closed
464 if (bgp_debug_neighbor_events(peer))
465 zlog_debug(
466 "%s [Event] BGP connection closed fd %d",
467 peer->host, peer->fd);
468
469 if (peer->status == Established) {
470 if (CHECK_FLAG(peer->sflags,
471 PEER_STATUS_NSF_MODE)) {
472 peer->last_reset =
473 PEER_DOWN_NSF_CLOSE_SESSION;
474 SET_FLAG(peer->sflags,
475 PEER_STATUS_NSF_WAIT);
476 } else
477 peer->last_reset =
478 PEER_DOWN_CLOSE_SESSION;
479 }
480
481 BGP_EVENT_ADD(peer, TCP_connection_closed);
482 SET_FLAG(status, BGP_IO_FATAL_ERR);
483 break;
484
485 case -2: // temporary error; come back later
486 SET_FLAG(status, BGP_IO_TRANS_ERR);
487 break;
488 default:
489 break;
490 }
491
492 return status;
493 }
494
495 // If we didn't have the header before read(), and now we do, set the
496 // appropriate flag. The caller must validate the header for us.
497 if (!have_header
498 && stream_get_endp(peer->ibuf_work) >= BGP_HEADER_SIZE) {
499 SET_FLAG(status, BGP_IO_READ_HEADER);
500 have_header = true;
501 }
502 // If we read the # of bytes specified in the tlv, we have read a full
503 // packet.
504 //
505 // Note that the header may not have been validated here. This flag
506 // means
507 // ONLY that we read the # of bytes specified in the header; if the
508 // header is
509 // not valid, the packet MUST NOT be processed further.
510 if (have_header && (stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE)
511 == stream_get_endp(peer->ibuf_work)))
512 SET_FLAG(status, BGP_IO_READ_FULLPACKET);
513
514 return status;
515}
516
517/*
518 * Called after we have read a BGP packet header. Validates marker, message
519 * type and packet length. If any of these aren't correct, sends a notify.
520 */
521static bool validate_header(struct peer *peer)
522{
523 u_int16_t size, type;
524
442c9afb
QY
525 static uint8_t marker[BGP_MARKER_SIZE] = {
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
528
529 if (memcmp(marker, peer->ibuf_work->data, BGP_MARKER_SIZE) != 0) {
530 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
531 BGP_NOTIFY_HEADER_NOT_SYNC);
532 return false;
533 }
424ab01d
QY
534
535 /* Get size and type. */
536 size = stream_getw_from(peer->ibuf_work, BGP_MARKER_SIZE);
537 type = stream_getc_from(peer->ibuf_work, BGP_MARKER_SIZE + 2);
538
539 /* BGP type check. */
540 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
541 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
542 && type != BGP_MSG_ROUTE_REFRESH_NEW
543 && type != BGP_MSG_ROUTE_REFRESH_OLD
544 && type != BGP_MSG_CAPABILITY) {
545 if (bgp_debug_neighbor_events(peer))
546 zlog_debug("%s unknown message type 0x%02x", peer->host,
547 type);
548
549 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
550 BGP_NOTIFY_HEADER_BAD_MESTYPE,
551 (u_char *)&type, 1);
552 return false;
553 }
554
555 /* Mimimum packet length check. */
556 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
557 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
558 || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE)
559 || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE)
560 || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
561 || (type == BGP_MSG_ROUTE_REFRESH_NEW
562 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
563 || (type == BGP_MSG_ROUTE_REFRESH_OLD
564 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
565 || (type == BGP_MSG_CAPABILITY
566 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
567 if (bgp_debug_neighbor_events(peer))
568 zlog_debug("%s bad message length - %d for %s",
569 peer->host, size,
570 type == 128 ? "ROUTE-REFRESH"
571 : bgp_type_str[(int)type]);
572
573 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
574 BGP_NOTIFY_HEADER_BAD_MESLEN,
575 (u_char *)&size, 2);
576 return false;
577 }
578
579 return true;
56257a44 580}