]>
Commit | Line | Data |
---|---|---|
958b450c | 1 | /* BGP I/O. |
51abb4b4 | 2 | * Implements packet I/O in a pthread. |
958b450c | 3 | * Copyright (C) 2017 Cumulus Networks |
51abb4b4 | 4 | * Quentin Young |
958b450c QY |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; see the file COPYING; if not, write to the | |
18 | * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, | |
19 | * MA 02110-1301 USA | |
56257a44 QY |
20 | */ |
21 | ||
95158b0c | 22 | /* clang-format off */ |
42cf651e | 23 | #include <zebra.h> |
95158b0c | 24 | #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock |
093279cd | 25 | #include <sys/uio.h> // for writev |
56257a44 | 26 | |
1ac267a2 | 27 | #include "frr_pthread.h" |
95158b0c QY |
28 | #include "linklist.h" // for list_delete, list_delete_all_node, lis... |
29 | #include "log.h" // for zlog_debug, safe_strerror, zlog_err | |
30 | #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE | |
31 | #include "network.h" // for ERRNO_IO_RETRY | |
32 | #include "stream.h" // for stream_get_endp, stream_getw_from, str... | |
74ffbfe6 | 33 | #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_... |
50478845 | 34 | #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread... |
95158b0c | 35 | #include "zassert.h" // for assert |
56257a44 | 36 | |
42cf651e | 37 | #include "bgpd/bgp_io.h" |
95158b0c | 38 | #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str |
14454c9f | 39 | #include "bgpd/bgp_errors.h" // for expanded error reference information |
95158b0c QY |
40 | #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event |
41 | #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify... | |
c7bb4f00 | 42 | #include "bgpd/bgp_trace.h" // for frrtraces |
95158b0c QY |
43 | #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm |
44 | /* clang-format on */ | |
56257a44 | 45 | |
424ab01d QY |
46 | /* forward declarations */ |
47 | static uint16_t bgp_write(struct peer *); | |
48 | static uint16_t bgp_read(struct peer *); | |
49 | static int bgp_process_writes(struct thread *); | |
50 | static int bgp_process_reads(struct thread *); | |
51 | static bool validate_header(struct peer *); | |
56257a44 | 52 | |
424ab01d | 53 | /* generic i/o status codes */ |
95158b0c QY |
54 | #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred |
55 | #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error | |
56257a44 | 56 | |
a715eab3 | 57 | /* Thread external API ----------------------------------------------------- */ |
56257a44 | 58 | |
424ab01d | 59 | void bgp_writes_on(struct peer *peer) |
56257a44 | 60 | { |
1ac267a2 | 61 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 62 | assert(fpt->running); |
f09a656d | 63 | |
424ab01d QY |
64 | assert(peer->status != Deleted); |
65 | assert(peer->obuf); | |
66 | assert(peer->ibuf); | |
67 | assert(peer->ibuf_work); | |
387f984e QY |
68 | assert(!peer->t_connect_check_r); |
69 | assert(!peer->t_connect_check_w); | |
424ab01d | 70 | assert(peer->fd); |
56257a44 | 71 | |
b750b0ba QY |
72 | thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd, |
73 | &peer->t_write); | |
74 | SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); | |
424ab01d | 75 | } |
56257a44 | 76 | |
424ab01d QY |
77 | void bgp_writes_off(struct peer *peer) |
78 | { | |
1ac267a2 | 79 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 80 | assert(fpt->running); |
151044ce | 81 | |
b750b0ba QY |
82 | thread_cancel_async(fpt->master, &peer->t_write, NULL); |
83 | THREAD_OFF(peer->t_generate_updgrp_packets); | |
56257a44 | 84 | |
b750b0ba | 85 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); |
56257a44 QY |
86 | } |
87 | ||
424ab01d | 88 | void bgp_reads_on(struct peer *peer) |
56257a44 | 89 | { |
1ac267a2 | 90 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 91 | assert(fpt->running); |
f09a656d | 92 | |
424ab01d QY |
93 | assert(peer->status != Deleted); |
94 | assert(peer->ibuf); | |
95 | assert(peer->fd); | |
96 | assert(peer->ibuf_work); | |
424ab01d | 97 | assert(peer->obuf); |
387f984e QY |
98 | assert(!peer->t_connect_check_r); |
99 | assert(!peer->t_connect_check_w); | |
424ab01d QY |
100 | assert(peer->fd); |
101 | ||
b750b0ba QY |
102 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
103 | &peer->t_read); | |
104 | ||
105 | SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); | |
56257a44 QY |
106 | } |
107 | ||
424ab01d | 108 | void bgp_reads_off(struct peer *peer) |
56257a44 | 109 | { |
1ac267a2 | 110 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 111 | assert(fpt->running); |
151044ce | 112 | |
b750b0ba QY |
113 | thread_cancel_async(fpt->master, &peer->t_read, NULL); |
114 | THREAD_OFF(peer->t_process_packet); | |
56257a44 | 115 | |
b750b0ba | 116 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); |
56257a44 QY |
117 | } |
118 | ||
a715eab3 | 119 | /* Thread internal functions ----------------------------------------------- */ |
51abb4b4 | 120 | |
a715eab3 | 121 | /* |
51abb4b4 | 122 | * Called from I/O pthread when a file descriptor has become ready for writing. |
424ab01d QY |
123 | */ |
124 | static int bgp_process_writes(struct thread *thread) | |
56257a44 | 125 | { |
424ab01d QY |
126 | static struct peer *peer; |
127 | peer = THREAD_ARG(thread); | |
128 | uint16_t status; | |
b750b0ba | 129 | bool reschedule; |
bbac44ac | 130 | bool fatal = false; |
424ab01d QY |
131 | |
132 | if (peer->fd < 0) | |
133 | return -1; | |
134 | ||
1ac267a2 | 135 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 136 | |
00dffa8c | 137 | frr_with_mutex(&peer->io_mtx) { |
424ab01d QY |
138 | status = bgp_write(peer); |
139 | reschedule = (stream_fifo_head(peer->obuf) != NULL); | |
140 | } | |
56257a44 | 141 | |
a715eab3 QY |
142 | /* no problem */ |
143 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
56257a44 | 144 | } |
56257a44 | 145 | |
a715eab3 | 146 | /* problem */ |
bbac44ac | 147 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
a715eab3 | 148 | reschedule = false; |
bbac44ac QY |
149 | fatal = true; |
150 | } | |
424ab01d | 151 | |
a77e2f4b S |
152 | /* If suppress fib pending is enabled, route is advertised to peers when |
153 | * the status is received from the FIB. The delay is added | |
154 | * to update group packet generate which will allow more routes to be | |
155 | * sent in the update message | |
156 | */ | |
424ab01d QY |
157 | if (reschedule) { |
158 | thread_add_write(fpt->master, bgp_process_writes, peer, | |
159 | peer->fd, &peer->t_write); | |
b785b7ad | 160 | } else if (!fatal) { |
a77e2f4b S |
161 | BGP_UPDATE_GROUP_TIMER_ON(&peer->t_generate_updgrp_packets, |
162 | bgp_generate_updgrp_packets); | |
424ab01d QY |
163 | } |
164 | ||
165 | return 0; | |
56257a44 QY |
166 | } |
167 | ||
a715eab3 | 168 | /* |
51abb4b4 QY |
169 | * Called from I/O pthread when a file descriptor has become ready for reading, |
170 | * or has hung up. | |
9eb217ff QY |
171 | * |
172 | * We read as much data as possible, process as many packets as we can and | |
173 | * place them on peer->ibuf for secondary processing by the main thread. | |
56257a44 | 174 | */ |
424ab01d | 175 | static int bgp_process_reads(struct thread *thread) |
56257a44 | 176 | { |
e11eeb8c QY |
177 | /* clang-format off */ |
178 | static struct peer *peer; // peer to read from | |
179 | uint16_t status; // bgp_read status code | |
180 | bool more = true; // whether we got more data | |
181 | bool fatal = false; // whether fatal error occurred | |
182 | bool added_pkt = false; // whether we pushed onto ->ibuf | |
e11eeb8c | 183 | /* clang-format on */ |
9eb217ff | 184 | |
424ab01d | 185 | peer = THREAD_ARG(thread); |
424ab01d | 186 | |
97b4a0ec | 187 | if (peer->fd < 0 || bm->terminating) |
424ab01d QY |
188 | return -1; |
189 | ||
1ac267a2 | 190 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 191 | |
00dffa8c | 192 | frr_with_mutex(&peer->io_mtx) { |
424ab01d QY |
193 | status = bgp_read(peer); |
194 | } | |
424ab01d | 195 | |
9eb217ff QY |
196 | /* error checking phase */ |
197 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
198 | /* no problem; just don't process packets */ | |
199 | more = false; | |
200 | } | |
424ab01d | 201 | |
9eb217ff QY |
202 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
203 | /* problem; tear down session */ | |
204 | more = false; | |
205 | fatal = true; | |
56257a44 | 206 | } |
56257a44 | 207 | |
9eb217ff QY |
208 | while (more) { |
209 | /* static buffer for transferring packets */ | |
9eb217ff | 210 | /* shorter alias to peer's input buffer */ |
74ffbfe6 | 211 | struct ringbuf *ibw = peer->ibuf_work; |
9eb217ff | 212 | /* packet size as given by header */ |
74ffbfe6 | 213 | uint16_t pktsize = 0; |
9eb217ff QY |
214 | |
215 | /* check that we have enough data for a header */ | |
74ffbfe6 | 216 | if (ringbuf_remain(ibw) < BGP_HEADER_SIZE) |
9eb217ff | 217 | break; |
424ab01d | 218 | |
a2b6e694 | 219 | /* check that header is valid */ |
220 | if (!validate_header(peer)) { | |
9eb217ff QY |
221 | fatal = true; |
222 | break; | |
424ab01d | 223 | } |
424ab01d | 224 | |
9eb217ff | 225 | /* header is valid; retrieve packet size */ |
74ffbfe6 QY |
226 | ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize)); |
227 | ||
228 | pktsize = ntohs(pktsize); | |
424ab01d | 229 | |
9eb217ff QY |
230 | /* if this fails we are seriously screwed */ |
231 | assert(pktsize <= BGP_MAX_PACKET_SIZE); | |
232 | ||
a715eab3 QY |
233 | /* |
234 | * If we have that much data, chuck it into its own | |
235 | * stream and append to input queue for processing. | |
236 | */ | |
74ffbfe6 | 237 | if (ringbuf_remain(ibw) >= pktsize) { |
9eb217ff | 238 | struct stream *pkt = stream_new(pktsize); |
7c9d82cd S |
239 | assert(STREAM_WRITEABLE(pkt) == pktsize); |
240 | assert(ringbuf_get(ibw, pkt->data, pktsize) == pktsize); | |
241 | stream_set_endp(pkt, pktsize); | |
9eb217ff | 242 | |
0c3436aa | 243 | frrtrace(2, frr_bgp, packet_read, peer, pkt); |
00dffa8c | 244 | frr_with_mutex(&peer->io_mtx) { |
9eb217ff QY |
245 | stream_fifo_push(peer->ibuf, pkt); |
246 | } | |
9eb217ff QY |
247 | |
248 | added_pkt = true; | |
249 | } else | |
250 | break; | |
251 | } | |
252 | ||
9eb217ff QY |
253 | /* handle invalid header */ |
254 | if (fatal) { | |
9eb217ff | 255 | /* wipe buffer just in case someone screwed up */ |
74ffbfe6 | 256 | ringbuf_wipe(peer->ibuf_work); |
9eb217ff | 257 | } else { |
421a7dfc QY |
258 | assert(ringbuf_space(peer->ibuf_work) >= BGP_MAX_PACKET_SIZE); |
259 | ||
424ab01d QY |
260 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
261 | &peer->t_read); | |
9eb217ff | 262 | if (added_pkt) |
7a86aa5a QY |
263 | thread_add_timer_msec(bm->master, bgp_process_packet, |
264 | peer, 0, &peer->t_process_packet); | |
9eb217ff | 265 | } |
424ab01d QY |
266 | |
267 | return 0; | |
56257a44 QY |
268 | } |
269 | ||
a715eab3 | 270 | /* |
56257a44 QY |
271 | * Flush peer output buffer. |
272 | * | |
273 | * This function pops packets off of peer->obuf and writes them to peer->fd. | |
274 | * The amount of packets written is equal to the minimum of peer->wpkt_quanta | |
424ab01d | 275 | * and the number of packets on the output buffer, unless an error occurs. |
56257a44 QY |
276 | * |
277 | * If write() returns an error, the appropriate FSM event is generated. | |
278 | * | |
279 | * The return value is equal to the number of packets written | |
280 | * (which may be zero). | |
281 | */ | |
424ab01d | 282 | static uint16_t bgp_write(struct peer *peer) |
56257a44 | 283 | { |
d7c0a89a | 284 | uint8_t type; |
56257a44 | 285 | struct stream *s; |
56257a44 | 286 | int update_last_write = 0; |
093279cd | 287 | unsigned int count; |
eb2277cf | 288 | uint32_t uo = 0; |
424ab01d | 289 | uint16_t status = 0; |
555e09d4 | 290 | uint32_t wpkt_quanta_old; |
56257a44 | 291 | |
093279cd QY |
292 | int writenum = 0; |
293 | int num; | |
294 | unsigned int iovsz; | |
295 | unsigned int strmsz; | |
296 | unsigned int total_written; | |
297 | ||
996c9314 LB |
298 | wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta, |
299 | memory_order_relaxed); | |
093279cd QY |
300 | struct stream *ostreams[wpkt_quanta_old]; |
301 | struct stream **streams = ostreams; | |
302 | struct iovec iov[wpkt_quanta_old]; | |
303 | ||
304 | s = stream_fifo_head(peer->obuf); | |
305 | ||
306 | if (!s) | |
307 | goto done; | |
308 | ||
309 | count = iovsz = 0; | |
310 | while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) { | |
311 | ostreams[iovsz] = s; | |
312 | iov[iovsz].iov_base = stream_pnt(s); | |
313 | iov[iovsz].iov_len = STREAM_READABLE(s); | |
314 | writenum += STREAM_READABLE(s); | |
315 | s = s->next; | |
316 | ++iovsz; | |
317 | ++count; | |
318 | } | |
319 | ||
320 | strmsz = iovsz; | |
321 | total_written = 0; | |
322 | ||
323 | do { | |
324 | num = writev(peer->fd, iov, iovsz); | |
325 | ||
326 | if (num < 0) { | |
327 | if (!ERRNO_IO_RETRY(errno)) { | |
328 | BGP_EVENT_ADD(peer, TCP_fatal_error); | |
329 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
330 | } else { | |
331 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
332 | } | |
333 | ||
334 | break; | |
335 | } else if (num != writenum) { | |
336 | unsigned int msg_written = 0; | |
337 | unsigned int ic = iovsz; | |
338 | ||
339 | for (unsigned int i = 0; i < ic; i++) { | |
340 | size_t ss = iov[i].iov_len; | |
555e09d4 | 341 | |
093279cd QY |
342 | if (ss > (unsigned int) num) |
343 | break; | |
56257a44 | 344 | |
093279cd QY |
345 | msg_written++; |
346 | iovsz--; | |
347 | writenum -= ss; | |
348 | num -= ss; | |
349 | } | |
56257a44 | 350 | |
093279cd | 351 | total_written += msg_written; |
56257a44 | 352 | |
18555366 QY |
353 | assert(total_written < count); |
354 | ||
093279cd QY |
355 | memmove(&iov, &iov[msg_written], |
356 | sizeof(iov[0]) * iovsz); | |
357 | streams = &streams[msg_written]; | |
358 | stream_forward_getp(streams[0], num); | |
359 | iov[0].iov_base = stream_pnt(streams[0]); | |
360 | iov[0].iov_len = STREAM_READABLE(streams[0]); | |
361 | ||
362 | writenum -= num; | |
363 | num = 0; | |
364 | assert(writenum > 0); | |
365 | } else { | |
366 | total_written = strmsz; | |
367 | } | |
368 | ||
369 | } while (num != writenum); | |
370 | ||
371 | /* Handle statistics */ | |
372 | for (unsigned int i = 0; i < total_written; i++) { | |
373 | s = stream_fifo_pop(peer->obuf); | |
374 | ||
375 | assert(s == ostreams[i]); | |
56257a44 QY |
376 | |
377 | /* Retrieve BGP packet type. */ | |
378 | stream_set_getp(s, BGP_MARKER_SIZE + 2); | |
379 | type = stream_getc(s); | |
380 | ||
381 | switch (type) { | |
382 | case BGP_MSG_OPEN: | |
1588f6f4 QY |
383 | atomic_fetch_add_explicit(&peer->open_out, 1, |
384 | memory_order_relaxed); | |
56257a44 QY |
385 | break; |
386 | case BGP_MSG_UPDATE: | |
1588f6f4 QY |
387 | atomic_fetch_add_explicit(&peer->update_out, 1, |
388 | memory_order_relaxed); | |
eb2277cf | 389 | uo++; |
56257a44 QY |
390 | break; |
391 | case BGP_MSG_NOTIFY: | |
1588f6f4 QY |
392 | atomic_fetch_add_explicit(&peer->notify_out, 1, |
393 | memory_order_relaxed); | |
56257a44 QY |
394 | /* Double start timer. */ |
395 | peer->v_start *= 2; | |
396 | ||
397 | /* Overflow check. */ | |
398 | if (peer->v_start >= (60 * 2)) | |
399 | peer->v_start = (60 * 2); | |
400 | ||
a715eab3 QY |
401 | /* |
402 | * Handle Graceful Restart case where the state changes | |
403 | * to Connect instead of Idle. | |
404 | */ | |
56257a44 QY |
405 | BGP_EVENT_ADD(peer, BGP_Stop); |
406 | goto done; | |
407 | ||
408 | case BGP_MSG_KEEPALIVE: | |
1588f6f4 QY |
409 | atomic_fetch_add_explicit(&peer->keepalive_out, 1, |
410 | memory_order_relaxed); | |
56257a44 QY |
411 | break; |
412 | case BGP_MSG_ROUTE_REFRESH_NEW: | |
413 | case BGP_MSG_ROUTE_REFRESH_OLD: | |
1588f6f4 QY |
414 | atomic_fetch_add_explicit(&peer->refresh_out, 1, |
415 | memory_order_relaxed); | |
56257a44 QY |
416 | break; |
417 | case BGP_MSG_CAPABILITY: | |
1588f6f4 QY |
418 | atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1, |
419 | memory_order_relaxed); | |
56257a44 QY |
420 | break; |
421 | } | |
422 | ||
093279cd QY |
423 | stream_free(s); |
424 | ostreams[i] = NULL; | |
56257a44 QY |
425 | update_last_write = 1; |
426 | } | |
427 | ||
428 | done : { | |
eb2277cf LB |
429 | /* |
430 | * Update last_update if UPDATEs were written. | |
431 | * Note: that these are only updated at end, | |
432 | * not per message (i.e., per loop) | |
433 | */ | |
434 | if (uo) | |
1588f6f4 QY |
435 | atomic_store_explicit(&peer->last_update, bgp_clock(), |
436 | memory_order_relaxed); | |
56257a44 | 437 | |
5c075a90 | 438 | /* If we TXed any flavor of packet */ |
56257a44 | 439 | if (update_last_write) |
1588f6f4 QY |
440 | atomic_store_explicit(&peer->last_write, bgp_clock(), |
441 | memory_order_relaxed); | |
56257a44 QY |
442 | } |
443 | ||
424ab01d QY |
444 | return status; |
445 | } | |
446 | ||
a715eab3 | 447 | /* |
51abb4b4 | 448 | * Reads a chunk of data from peer->fd into peer->ibuf_work. |
424ab01d | 449 | * |
51abb4b4 | 450 | * @return status flag (see top-of-file) |
424ab01d QY |
451 | */ |
452 | static uint16_t bgp_read(struct peer *peer) | |
453 | { | |
b750b0ba QY |
454 | size_t readsize; // how many bytes we want to read |
455 | ssize_t nbytes; // how many bytes we actually read | |
424ab01d | 456 | uint16_t status = 0; |
74ffbfe6 | 457 | static uint8_t ibw[BGP_MAX_PACKET_SIZE * BGP_READ_PACKET_MAX]; |
424ab01d | 458 | |
74ffbfe6 QY |
459 | readsize = MIN(ringbuf_space(peer->ibuf_work), sizeof(ibw)); |
460 | nbytes = read(peer->fd, ibw, readsize); | |
424ab01d | 461 | |
74ffbfe6 QY |
462 | /* EAGAIN or EWOULDBLOCK; come back later */ |
463 | if (nbytes < 0 && ERRNO_IO_RETRY(errno)) { | |
464 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
996c9314 | 465 | /* Fatal error; tear down session */ |
74ffbfe6 | 466 | } else if (nbytes < 0) { |
e50f7cfd | 467 | flog_err(EC_BGP_UPDATE_RCV, |
1c50c1c0 QY |
468 | "%s [Error] bgp_read_packet error: %s", peer->host, |
469 | safe_strerror(errno)); | |
85145b62 QY |
470 | |
471 | if (peer->status == Established) { | |
36235319 QY |
472 | if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
473 | || CHECK_FLAG(peer->flags, | |
474 | PEER_FLAG_GRACEFUL_RESTART_HELPER)) | |
475 | && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { | |
85145b62 QY |
476 | peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
477 | SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); | |
478 | } else | |
479 | peer->last_reset = PEER_DOWN_CLOSE_SESSION; | |
480 | } | |
424ab01d | 481 | |
85145b62 QY |
482 | BGP_EVENT_ADD(peer, TCP_fatal_error); |
483 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
996c9314 | 484 | /* Received EOF / TCP session closed */ |
74ffbfe6 | 485 | } else if (nbytes == 0) { |
85145b62 QY |
486 | if (bgp_debug_neighbor_events(peer)) |
487 | zlog_debug("%s [Event] BGP connection closed fd %d", | |
36235319 | 488 | peer->host, peer->fd); |
85145b62 QY |
489 | |
490 | if (peer->status == Established) { | |
36235319 QY |
491 | if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART) |
492 | || CHECK_FLAG(peer->flags, | |
493 | PEER_FLAG_GRACEFUL_RESTART_HELPER)) | |
494 | && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) { | |
85145b62 QY |
495 | peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION; |
496 | SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT); | |
497 | } else | |
498 | peer->last_reset = PEER_DOWN_CLOSE_SESSION; | |
424ab01d QY |
499 | } |
500 | ||
85145b62 QY |
501 | BGP_EVENT_ADD(peer, TCP_connection_closed); |
502 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
74ffbfe6 QY |
503 | } else { |
504 | assert(ringbuf_put(peer->ibuf_work, ibw, nbytes) | |
505 | == (size_t)nbytes); | |
424ab01d QY |
506 | } |
507 | ||
424ab01d QY |
508 | return status; |
509 | } | |
510 | ||
511 | /* | |
512 | * Called after we have read a BGP packet header. Validates marker, message | |
513 | * type and packet length. If any of these aren't correct, sends a notify. | |
74ffbfe6 QY |
514 | * |
515 | * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input | |
516 | * buffer. | |
424ab01d QY |
517 | */ |
518 | static bool validate_header(struct peer *peer) | |
519 | { | |
3fe63c29 QY |
520 | uint16_t size; |
521 | uint8_t type; | |
74ffbfe6 | 522 | struct ringbuf *pkt = peer->ibuf_work; |
424ab01d | 523 | |
2b64873d | 524 | static const uint8_t m_correct[BGP_MARKER_SIZE] = { |
74ffbfe6 QY |
525 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
526 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; | |
527 | uint8_t m_rx[BGP_MARKER_SIZE] = {0x00}; | |
442c9afb | 528 | |
74ffbfe6 QY |
529 | if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE) |
530 | return false; | |
531 | ||
532 | if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) { | |
442c9afb QY |
533 | bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR, |
534 | BGP_NOTIFY_HEADER_NOT_SYNC); | |
535 | return false; | |
536 | } | |
424ab01d | 537 | |
74ffbfe6 QY |
538 | /* Get size and type in network byte order. */ |
539 | ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size)); | |
540 | ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type)); | |
541 | ||
542 | size = ntohs(size); | |
424ab01d QY |
543 | |
544 | /* BGP type check. */ | |
545 | if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE | |
546 | && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE | |
547 | && type != BGP_MSG_ROUTE_REFRESH_NEW | |
548 | && type != BGP_MSG_ROUTE_REFRESH_OLD | |
549 | && type != BGP_MSG_CAPABILITY) { | |
3fe63c29 | 550 | if (bgp_debug_neighbor_events(peer)) |
424ab01d QY |
551 | zlog_debug("%s unknown message type 0x%02x", peer->host, |
552 | type); | |
553 | ||
554 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, | |
996c9314 LB |
555 | BGP_NOTIFY_HEADER_BAD_MESTYPE, &type, |
556 | 1); | |
424ab01d QY |
557 | return false; |
558 | } | |
559 | ||
3fe63c29 | 560 | /* Minimum packet length check. */ |
424ab01d QY |
561 | if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE) |
562 | || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE) | |
563 | || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE) | |
564 | || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE) | |
565 | || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE) | |
566 | || (type == BGP_MSG_ROUTE_REFRESH_NEW | |
567 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
568 | || (type == BGP_MSG_ROUTE_REFRESH_OLD | |
569 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
570 | || (type == BGP_MSG_CAPABILITY | |
571 | && size < BGP_MSG_CAPABILITY_MIN_SIZE)) { | |
1588f6f4 | 572 | if (bgp_debug_neighbor_events(peer)) { |
424ab01d QY |
573 | zlog_debug("%s bad message length - %d for %s", |
574 | peer->host, size, | |
575 | type == 128 ? "ROUTE-REFRESH" | |
996c9314 | 576 | : bgp_type_str[(int)type]); |
1588f6f4 | 577 | } |
424ab01d | 578 | |
3fe63c29 QY |
579 | uint16_t nsize = htons(size); |
580 | ||
424ab01d QY |
581 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, |
582 | BGP_NOTIFY_HEADER_BAD_MESLEN, | |
996c9314 | 583 | (unsigned char *)&nsize, 2); |
424ab01d QY |
584 | return false; |
585 | } | |
586 | ||
587 | return true; | |
56257a44 | 588 | } |