]>
Commit | Line | Data |
---|---|---|
958b450c | 1 | /* BGP I/O. |
51abb4b4 | 2 | * Implements packet I/O in a pthread. |
958b450c | 3 | * Copyright (C) 2017 Cumulus Networks |
51abb4b4 | 4 | * Quentin Young |
958b450c QY |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; see the file COPYING; if not, write to the | |
18 | * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, | |
19 | * MA 02110-1301 USA | |
56257a44 QY |
20 | */ |
21 | ||
95158b0c | 22 | /* clang-format off */ |
42cf651e | 23 | #include <zebra.h> |
95158b0c | 24 | #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock |
093279cd | 25 | #include <sys/uio.h> // for writev |
56257a44 | 26 | |
1ac267a2 | 27 | #include "frr_pthread.h" |
95158b0c QY |
28 | #include "linklist.h" // for list_delete, list_delete_all_node, lis... |
29 | #include "log.h" // for zlog_debug, safe_strerror, zlog_err | |
30 | #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE | |
31 | #include "network.h" // for ERRNO_IO_RETRY | |
32 | #include "stream.h" // for stream_get_endp, stream_getw_from, str... | |
74ffbfe6 | 33 | #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_... |
95158b0c QY |
34 | #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread, thread... |
35 | #include "zassert.h" // for assert | |
56257a44 | 36 | |
42cf651e | 37 | #include "bgpd/bgp_io.h" |
95158b0c | 38 | #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str |
14454c9f | 39 | #include "bgpd/bgp_errors.h" // for expanded error reference information |
95158b0c QY |
40 | #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event |
41 | #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify... | |
42 | #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm | |
43 | /* clang-format on */ | |
56257a44 | 44 | |
424ab01d QY |
45 | /* forward declarations */ |
46 | static uint16_t bgp_write(struct peer *); | |
47 | static uint16_t bgp_read(struct peer *); | |
48 | static int bgp_process_writes(struct thread *); | |
49 | static int bgp_process_reads(struct thread *); | |
50 | static bool validate_header(struct peer *); | |
56257a44 | 51 | |
424ab01d | 52 | /* generic i/o status codes */ |
95158b0c QY |
53 | #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred |
54 | #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error | |
56257a44 | 55 | |
a715eab3 | 56 | /* Thread external API ----------------------------------------------------- */ |
56257a44 | 57 | |
424ab01d | 58 | void bgp_writes_on(struct peer *peer) |
56257a44 | 59 | { |
1ac267a2 | 60 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 61 | assert(fpt->running); |
f09a656d | 62 | |
424ab01d QY |
63 | assert(peer->status != Deleted); |
64 | assert(peer->obuf); | |
65 | assert(peer->ibuf); | |
66 | assert(peer->ibuf_work); | |
387f984e QY |
67 | assert(!peer->t_connect_check_r); |
68 | assert(!peer->t_connect_check_w); | |
424ab01d | 69 | assert(peer->fd); |
56257a44 | 70 | |
b750b0ba QY |
71 | thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd, |
72 | &peer->t_write); | |
73 | SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); | |
424ab01d | 74 | } |
56257a44 | 75 | |
424ab01d QY |
76 | void bgp_writes_off(struct peer *peer) |
77 | { | |
1ac267a2 | 78 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 79 | assert(fpt->running); |
151044ce | 80 | |
b750b0ba QY |
81 | thread_cancel_async(fpt->master, &peer->t_write, NULL); |
82 | THREAD_OFF(peer->t_generate_updgrp_packets); | |
56257a44 | 83 | |
b750b0ba | 84 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); |
56257a44 QY |
85 | } |
86 | ||
424ab01d | 87 | void bgp_reads_on(struct peer *peer) |
56257a44 | 88 | { |
1ac267a2 | 89 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 90 | assert(fpt->running); |
f09a656d | 91 | |
424ab01d QY |
92 | assert(peer->status != Deleted); |
93 | assert(peer->ibuf); | |
94 | assert(peer->fd); | |
95 | assert(peer->ibuf_work); | |
424ab01d | 96 | assert(peer->obuf); |
387f984e QY |
97 | assert(!peer->t_connect_check_r); |
98 | assert(!peer->t_connect_check_w); | |
424ab01d QY |
99 | assert(peer->fd); |
100 | ||
b750b0ba QY |
101 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
102 | &peer->t_read); | |
103 | ||
104 | SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); | |
56257a44 QY |
105 | } |
106 | ||
424ab01d | 107 | void bgp_reads_off(struct peer *peer) |
56257a44 | 108 | { |
1ac267a2 | 109 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 110 | assert(fpt->running); |
151044ce | 111 | |
b750b0ba QY |
112 | thread_cancel_async(fpt->master, &peer->t_read, NULL); |
113 | THREAD_OFF(peer->t_process_packet); | |
56257a44 | 114 | |
b750b0ba | 115 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); |
56257a44 QY |
116 | } |
117 | ||
a715eab3 | 118 | /* Thread internal functions ----------------------------------------------- */ |
51abb4b4 | 119 | |
a715eab3 | 120 | /* |
51abb4b4 | 121 | * Called from I/O pthread when a file descriptor has become ready for writing. |
424ab01d QY |
122 | */ |
123 | static int bgp_process_writes(struct thread *thread) | |
56257a44 | 124 | { |
424ab01d QY |
125 | static struct peer *peer; |
126 | peer = THREAD_ARG(thread); | |
127 | uint16_t status; | |
b750b0ba | 128 | bool reschedule; |
bbac44ac | 129 | bool fatal = false; |
424ab01d QY |
130 | |
131 | if (peer->fd < 0) | |
132 | return -1; | |
133 | ||
1ac267a2 | 134 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 135 | |
00dffa8c | 136 | frr_with_mutex(&peer->io_mtx) { |
424ab01d QY |
137 | status = bgp_write(peer); |
138 | reschedule = (stream_fifo_head(peer->obuf) != NULL); | |
139 | } | |
56257a44 | 140 | |
a715eab3 QY |
141 | /* no problem */ |
142 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
56257a44 | 143 | } |
56257a44 | 144 | |
a715eab3 | 145 | /* problem */ |
bbac44ac | 146 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
a715eab3 | 147 | reschedule = false; |
bbac44ac QY |
148 | fatal = true; |
149 | } | |
424ab01d QY |
150 | |
151 | if (reschedule) { | |
152 | thread_add_write(fpt->master, bgp_process_writes, peer, | |
153 | peer->fd, &peer->t_write); | |
b785b7ad QY |
154 | } else if (!fatal) { |
155 | BGP_TIMER_ON(peer->t_generate_updgrp_packets, | |
156 | bgp_generate_updgrp_packets, 0); | |
424ab01d QY |
157 | } |
158 | ||
159 | return 0; | |
56257a44 QY |
160 | } |
161 | ||
a715eab3 | 162 | /* |
51abb4b4 QY |
163 | * Called from I/O pthread when a file descriptor has become ready for reading, |
164 | * or has hung up. | |
9eb217ff QY |
165 | * |
166 | * We read as much data as possible, process as many packets as we can and | |
167 | * place them on peer->ibuf for secondary processing by the main thread. | |
56257a44 | 168 | */ |
424ab01d | 169 | static int bgp_process_reads(struct thread *thread) |
56257a44 | 170 | { |
e11eeb8c QY |
171 | /* clang-format off */ |
172 | static struct peer *peer; // peer to read from | |
173 | uint16_t status; // bgp_read status code | |
174 | bool more = true; // whether we got more data | |
175 | bool fatal = false; // whether fatal error occurred | |
176 | bool added_pkt = false; // whether we pushed onto ->ibuf | |
e11eeb8c | 177 | /* clang-format on */ |
9eb217ff | 178 | |
424ab01d | 179 | peer = THREAD_ARG(thread); |
424ab01d | 180 | |
97b4a0ec | 181 | if (peer->fd < 0 || bm->terminating) |
424ab01d QY |
182 | return -1; |
183 | ||
1ac267a2 | 184 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 185 | |
00dffa8c | 186 | frr_with_mutex(&peer->io_mtx) { |
424ab01d QY |
187 | status = bgp_read(peer); |
188 | } | |
424ab01d | 189 | |
9eb217ff QY |
190 | /* error checking phase */ |
191 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
192 | /* no problem; just don't process packets */ | |
193 | more = false; | |
194 | } | |
424ab01d | 195 | |
9eb217ff QY |
196 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
197 | /* problem; tear down session */ | |
198 | more = false; | |
199 | fatal = true; | |
56257a44 | 200 | } |
56257a44 | 201 | |
9eb217ff QY |
202 | while (more) { |
203 | /* static buffer for transferring packets */ | |
204 | static unsigned char pktbuf[BGP_MAX_PACKET_SIZE]; | |
205 | /* shorter alias to peer's input buffer */ | |
74ffbfe6 | 206 | struct ringbuf *ibw = peer->ibuf_work; |
9eb217ff | 207 | /* packet size as given by header */ |
74ffbfe6 | 208 | uint16_t pktsize = 0; |
9eb217ff QY |
209 | |
210 | /* check that we have enough data for a header */ | |
74ffbfe6 | 211 | if (ringbuf_remain(ibw) < BGP_HEADER_SIZE) |
9eb217ff | 212 | break; |
424ab01d | 213 | |
a2b6e694 | 214 | /* check that header is valid */ |
215 | if (!validate_header(peer)) { | |
9eb217ff QY |
216 | fatal = true; |
217 | break; | |
424ab01d | 218 | } |
424ab01d | 219 | |
9eb217ff | 220 | /* header is valid; retrieve packet size */ |
74ffbfe6 QY |
221 | ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize)); |
222 | ||
223 | pktsize = ntohs(pktsize); | |
424ab01d | 224 | |
9eb217ff QY |
225 | /* if this fails we are seriously screwed */ |
226 | assert(pktsize <= BGP_MAX_PACKET_SIZE); | |
227 | ||
a715eab3 QY |
228 | /* |
229 | * If we have that much data, chuck it into its own | |
230 | * stream and append to input queue for processing. | |
231 | */ | |
74ffbfe6 | 232 | if (ringbuf_remain(ibw) >= pktsize) { |
9eb217ff | 233 | struct stream *pkt = stream_new(pktsize); |
74ffbfe6 | 234 | assert(ringbuf_get(ibw, pktbuf, pktsize) == pktsize); |
9eb217ff QY |
235 | stream_put(pkt, pktbuf, pktsize); |
236 | ||
00dffa8c | 237 | frr_with_mutex(&peer->io_mtx) { |
9eb217ff QY |
238 | stream_fifo_push(peer->ibuf, pkt); |
239 | } | |
9eb217ff QY |
240 | |
241 | added_pkt = true; | |
242 | } else | |
243 | break; | |
244 | } | |
245 | ||
9eb217ff QY |
246 | /* handle invalid header */ |
247 | if (fatal) { | |
9eb217ff | 248 | /* wipe buffer just in case someone screwed up */ |
74ffbfe6 | 249 | ringbuf_wipe(peer->ibuf_work); |
9eb217ff | 250 | } else { |
421a7dfc QY |
251 | assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE); |
252 | ||
424ab01d QY |
253 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
254 | &peer->t_read); | |
9eb217ff | 255 | if (added_pkt) |
7a86aa5a QY |
256 | thread_add_timer_msec(bm->master, bgp_process_packet, |
257 | peer, 0, &peer->t_process_packet); | |
9eb217ff | 258 | } |
424ab01d QY |
259 | |
260 | return 0; | |
56257a44 QY |
261 | } |
262 | ||
a715eab3 | 263 | /* |
56257a44 QY |
264 | * Flush peer output buffer. |
265 | * | |
266 | * This function pops packets off of peer->obuf and writes them to peer->fd. | |
267 | * The amount of packets written is equal to the minimum of peer->wpkt_quanta | |
424ab01d | 268 | * and the number of packets on the output buffer, unless an error occurs. |
56257a44 QY |
269 | * |
270 | * If write() returns an error, the appropriate FSM event is generated. | |
271 | * | |
272 | * The return value is equal to the number of packets written | |
273 | * (which may be zero). | |
274 | */ | |
424ab01d | 275 | static uint16_t bgp_write(struct peer *peer) |
56257a44 | 276 | { |
d7c0a89a | 277 | uint8_t type; |
56257a44 | 278 | struct stream *s; |
56257a44 | 279 | int update_last_write = 0; |
093279cd | 280 | unsigned int count; |
eb2277cf | 281 | uint32_t uo = 0; |
424ab01d | 282 | uint16_t status = 0; |
555e09d4 | 283 | uint32_t wpkt_quanta_old; |
56257a44 | 284 | |
093279cd QY |
285 | int writenum = 0; |
286 | int num; | |
287 | unsigned int iovsz; | |
288 | unsigned int strmsz; | |
289 | unsigned int total_written; | |
290 | ||
996c9314 LB |
291 | wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta, |
292 | memory_order_relaxed); | |
093279cd QY |
293 | struct stream *ostreams[wpkt_quanta_old]; |
294 | struct stream **streams = ostreams; | |
295 | struct iovec iov[wpkt_quanta_old]; | |
296 | ||
297 | s = stream_fifo_head(peer->obuf); | |
298 | ||
299 | if (!s) | |
300 | goto done; | |
301 | ||
302 | count = iovsz = 0; | |
303 | while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) { | |
304 | ostreams[iovsz] = s; | |
305 | iov[iovsz].iov_base = stream_pnt(s); | |
306 | iov[iovsz].iov_len = STREAM_READABLE(s); | |
307 | writenum += STREAM_READABLE(s); | |
308 | s = s->next; | |
309 | ++iovsz; | |
310 | ++count; | |
311 | } | |
312 | ||
313 | strmsz = iovsz; | |
314 | total_written = 0; | |
315 | ||
316 | do { | |
317 | num = writev(peer->fd, iov, iovsz); | |
318 | ||
319 | if (num < 0) { | |
320 | if (!ERRNO_IO_RETRY(errno)) { | |
321 | BGP_EVENT_ADD(peer, TCP_fatal_error); | |
322 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
323 | } else { | |
324 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
325 | } | |
326 | ||
327 | break; | |
328 | } else if (num != writenum) { | |
329 | unsigned int msg_written = 0; | |
330 | unsigned int ic = iovsz; | |
331 | ||
332 | for (unsigned int i = 0; i < ic; i++) { | |
333 | size_t ss = iov[i].iov_len; | |
555e09d4 | 334 | |
093279cd QY |
335 | if (ss > (unsigned int) num) |
336 | break; | |
56257a44 | 337 | |
093279cd QY |
338 | msg_written++; |
339 | iovsz--; | |
340 | writenum -= ss; | |
341 | num -= ss; | |
342 | } | |
56257a44 | 343 | |
093279cd | 344 | total_written += msg_written; |
56257a44 | 345 | |
18555366 QY |
346 | assert(total_written < count); |
347 | ||
093279cd QY |
348 | memmove(&iov, &iov[msg_written], |
349 | sizeof(iov[0]) * iovsz); | |
350 | streams = &streams[msg_written]; | |
351 | stream_forward_getp(streams[0], num); | |
352 | iov[0].iov_base = stream_pnt(streams[0]); | |
353 | iov[0].iov_len = STREAM_READABLE(streams[0]); | |
354 | ||
355 | writenum -= num; | |
356 | num = 0; | |
357 | assert(writenum > 0); | |
358 | } else { | |
359 | total_written = strmsz; | |
360 | } | |
361 | ||
362 | } while (num != writenum); | |
363 | ||
364 | /* Handle statistics */ | |
365 | for (unsigned int i = 0; i < total_written; i++) { | |
366 | s = stream_fifo_pop(peer->obuf); | |
367 | ||
368 | assert(s == ostreams[i]); | |
56257a44 QY |
369 | |
370 | /* Retrieve BGP packet type. */ | |
371 | stream_set_getp(s, BGP_MARKER_SIZE + 2); | |
372 | type = stream_getc(s); | |
373 | ||
374 | switch (type) { | |
375 | case BGP_MSG_OPEN: | |
1588f6f4 QY |
376 | atomic_fetch_add_explicit(&peer->open_out, 1, |
377 | memory_order_relaxed); | |
56257a44 QY |
378 | break; |
379 | case BGP_MSG_UPDATE: | |
1588f6f4 QY |
380 | atomic_fetch_add_explicit(&peer->update_out, 1, |
381 | memory_order_relaxed); | |
eb2277cf | 382 | uo++; |
56257a44 QY |
383 | break; |
384 | case BGP_MSG_NOTIFY: | |
1588f6f4 QY |
385 | atomic_fetch_add_explicit(&peer->notify_out, 1, |
386 | memory_order_relaxed); | |
56257a44 QY |
387 | /* Double start timer. */ |
388 | peer->v_start *= 2; | |
389 | ||
390 | /* Overflow check. */ | |
391 | if (peer->v_start >= (60 * 2)) | |
392 | peer->v_start = (60 * 2); | |
393 | ||
a715eab3 QY |
394 | /* |
395 | * Handle Graceful Restart case where the state changes | |
396 | * to Connect instead of Idle. | |
397 | */ | |
56257a44 QY |
398 | BGP_EVENT_ADD(peer, BGP_Stop); |
399 | goto done; | |
400 | ||
401 | case BGP_MSG_KEEPALIVE: | |
1588f6f4 QY |
402 | atomic_fetch_add_explicit(&peer->keepalive_out, 1, |
403 | memory_order_relaxed); | |
56257a44 QY |
404 | break; |
405 | case BGP_MSG_ROUTE_REFRESH_NEW: | |
406 | case BGP_MSG_ROUTE_REFRESH_OLD: | |
1588f6f4 QY |
407 | atomic_fetch_add_explicit(&peer->refresh_out, 1, |
408 | memory_order_relaxed); | |
56257a44 QY |
409 | break; |
410 | case BGP_MSG_CAPABILITY: | |
1588f6f4 QY |
411 | atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1, |
412 | memory_order_relaxed); | |
56257a44 QY |
413 | break; |
414 | } | |
415 | ||
093279cd QY |
416 | stream_free(s); |
417 | ostreams[i] = NULL; | |
56257a44 QY |
418 | update_last_write = 1; |
419 | } | |
420 | ||
421 | done : { | |
eb2277cf LB |
422 | /* |
423 | * Update last_update if UPDATEs were written. | |
424 | * Note: that these are only updated at end, | |
425 | * not per message (i.e., per loop) | |
426 | */ | |
427 | if (uo) | |
1588f6f4 QY |
428 | atomic_store_explicit(&peer->last_update, bgp_clock(), |
429 | memory_order_relaxed); | |
56257a44 | 430 | |
5c075a90 | 431 | /* If we TXed any flavor of packet */ |
56257a44 | 432 | if (update_last_write) |
1588f6f4 QY |
433 | atomic_store_explicit(&peer->last_write, bgp_clock(), |
434 | memory_order_relaxed); | |
56257a44 QY |
435 | } |
436 | ||
424ab01d QY |
437 | return status; |
438 | } | |
439 | ||
a715eab3 | 440 | /* |
51abb4b4 | 441 | * Reads a chunk of data from peer->fd into peer->ibuf_work. |
424ab01d | 442 | * |
51abb4b4 | 443 | * @return status flag (see top-of-file) |
424ab01d QY |
444 | */ |
445 | static uint16_t bgp_read(struct peer *peer) | |
446 | { | |
b750b0ba QY |
447 | size_t readsize; // how many bytes we want to read |
448 | ssize_t nbytes; // how many bytes we actually read | |
424ab01d | 449 | uint16_t status = 0; |
74ffbfe6 | 450 | static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX]; |
424ab01d | 451 | |
74ffbfe6 QY |
452 | readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw)); |
453 | nbytes = read(peer->fd, ibw, readsize); | |
424ab01d | 454 | |
74ffbfe6 QY |
455 | /* EAGAIN or EWOULDBLOCK; come back later */ |
456 | if (nbytes < 0 && ERRNO_IO_RETRY(errno)) { | |
457 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
996c9314 | 458 | /* Fatal error; tear down session */ |
74ffbfe6 | 459 | } else if (nbytes < 0) { |
e50f7cfd | 460 | flog_err(EC_BGP_UPDATE_RCV, |
1c50c1c0 QY |
461 | "%s [Error] bgp_read_packet error: %s", peer->host, |
462 | safe_strerror(errno)); | |
85145b62 QY |
463 | |
464 | if (peer->status == Established) { | |
36235319 QY |
465 | if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
466 | || CHECK_FLAG(peer->flags, | |
467 | PEER_FLAG_GRACEFUL_RESTART_HELPER)) | |
468 | && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { | |
85145b62 QY |
469 | peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
470 | SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); | |
471 | } else | |
472 | peer->last_reset = PEER_DOWN_CLOSE_SESSION; | |
473 | } | |
424ab01d | 474 | |
85145b62 QY |
475 | BGP_EVENT_ADD(peer, TCP_fatal_error); |
476 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
996c9314 | 477 | /* Received EOF / TCP session closed */ |
74ffbfe6 | 478 | } else if (nbytes == 0) { |
85145b62 QY |
479 | if (bgp_debug_neighbor_events(peer)) |
480 | zlog_debug("%s [Event] BGP connection closed fd %d", | |
36235319 | 481 | peer->host, peer->fd); |
85145b62 QY |
482 | |
483 | if (peer->status == Established) { | |
36235319 QY |
484 | if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
485 | || CHECK_FLAG(peer->flags, | |
486 | PEER_FLAG_GRACEFUL_RESTART_HELPER)) | |
487 | && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { | |
85145b62 QY |
488 | peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
489 | SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); | |
490 | } else | |
491 | peer->last_reset = PEER_DOWN_CLOSE_SESSION; | |
424ab01d QY |
492 | } |
493 | ||
85145b62 QY |
494 | BGP_EVENT_ADD(peer, TCP_connection_closed); |
495 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
74ffbfe6 QY |
496 | } else { |
497 | assert(ringbuf_put(peer->ibuf_work, ibw, nbytes) | |
498 | == (size_t)nbytes); | |
424ab01d QY |
499 | } |
500 | ||
424ab01d QY |
501 | return status; |
502 | } | |
503 | ||
504 | /* | |
505 | * Called after we have read a BGP packet header. Validates marker, message | |
506 | * type and packet length. If any of these aren't correct, sends a notify. | |
74ffbfe6 QY |
507 | * |
508 | * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input | |
509 | * buffer. | |
424ab01d QY |
510 | */ |
511 | static bool validate_header(struct peer *peer) | |
512 | { | |
3fe63c29 QY |
513 | uint16_t size; |
514 | uint8_t type; | |
74ffbfe6 | 515 | struct ringbuf *pkt = peer->ibuf_work; |
424ab01d | 516 | |
2b64873d | 517 | static const uint8_t m_correct[BGP_MARKER_SIZE] = { |
74ffbfe6 QY |
518 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
519 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; | |
520 | uint8_t m_rx[BGP_MARKER_SIZE] = {0x00}; | |
442c9afb | 521 | |
74ffbfe6 QY |
522 | if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE) |
523 | return false; | |
524 | ||
525 | if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) { | |
442c9afb QY |
526 | bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR, |
527 | BGP_NOTIFY_HEADER_NOT_SYNC); | |
528 | return false; | |
529 | } | |
424ab01d | 530 | |
74ffbfe6 QY |
531 | /* Get size and type in network byte order. */ |
532 | ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size)); | |
533 | ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type)); | |
534 | ||
535 | size = ntohs(size); | |
424ab01d QY |
536 | |
537 | /* BGP type check. */ | |
538 | if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE | |
539 | && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE | |
540 | && type != BGP_MSG_ROUTE_REFRESH_NEW | |
541 | && type != BGP_MSG_ROUTE_REFRESH_OLD | |
542 | && type != BGP_MSG_CAPABILITY) { | |
3fe63c29 | 543 | if (bgp_debug_neighbor_events(peer)) |
424ab01d QY |
544 | zlog_debug("%s unknown message type 0x%02x", peer->host, |
545 | type); | |
546 | ||
547 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, | |
996c9314 LB |
548 | BGP_NOTIFY_HEADER_BAD_MESTYPE, &type, |
549 | 1); | |
424ab01d QY |
550 | return false; |
551 | } | |
552 | ||
3fe63c29 | 553 | /* Minimum packet length check. */ |
424ab01d QY |
554 | if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE) |
555 | || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE) | |
556 | || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE) | |
557 | || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE) | |
558 | || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE) | |
559 | || (type == BGP_MSG_ROUTE_REFRESH_NEW | |
560 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
561 | || (type == BGP_MSG_ROUTE_REFRESH_OLD | |
562 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
563 | || (type == BGP_MSG_CAPABILITY | |
564 | && size < BGP_MSG_CAPABILITY_MIN_SIZE)) { | |
1588f6f4 | 565 | if (bgp_debug_neighbor_events(peer)) { |
424ab01d QY |
566 | zlog_debug("%s bad message length - %d for %s", |
567 | peer->host, size, | |
568 | type == 128 ? "ROUTE-REFRESH" | |
996c9314 | 569 | : bgp_type_str[(int)type]); |
1588f6f4 | 570 | } |
424ab01d | 571 | |
3fe63c29 QY |
572 | uint16_t nsize = htons(size); |
573 | ||
424ab01d QY |
574 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, |
575 | BGP_NOTIFY_HEADER_BAD_MESLEN, | |
996c9314 | 576 | (unsigned char *)&nsize, 2); |
424ab01d QY |
577 | return false; |
578 | } | |
579 | ||
580 | return true; | |
56257a44 | 581 | } |