]>
Commit | Line | Data |
---|---|---|
958b450c | 1 | /* BGP I/O. |
51abb4b4 | 2 | * Implements packet I/O in a pthread. |
958b450c | 3 | * Copyright (C) 2017 Cumulus Networks |
51abb4b4 | 4 | * Quentin Young |
958b450c QY |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; see the file COPYING; if not, write to the | |
18 | * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, | |
19 | * MA 02110-1301 USA | |
56257a44 QY |
20 | */ |
21 | ||
95158b0c | 22 | /* clang-format off */ |
42cf651e | 23 | #include <zebra.h> |
95158b0c | 24 | #include <pthread.h> // for pthread_mutex_unlock, pthread_mutex_lock |
093279cd | 25 | #include <sys/uio.h> // for writev |
56257a44 | 26 | |
1ac267a2 | 27 | #include "frr_pthread.h" |
95158b0c QY |
28 | #include "linklist.h" // for list_delete, list_delete_all_node, lis... |
29 | #include "log.h" // for zlog_debug, safe_strerror, zlog_err | |
30 | #include "memory.h" // for MTYPE_TMP, XCALLOC, XFREE | |
31 | #include "network.h" // for ERRNO_IO_RETRY | |
32 | #include "stream.h" // for stream_get_endp, stream_getw_from, str... | |
74ffbfe6 | 33 | #include "ringbuf.h" // for ringbuf_remain, ringbuf_peek, ringbuf_... |
50478845 | 34 | #include "thread.h" // for THREAD_OFF, THREAD_ARG, thread... |
56257a44 | 35 | |
42cf651e | 36 | #include "bgpd/bgp_io.h" |
95158b0c | 37 | #include "bgpd/bgp_debug.h" // for bgp_debug_neighbor_events, bgp_type_str |
14454c9f | 38 | #include "bgpd/bgp_errors.h" // for expanded error reference information |
95158b0c QY |
39 | #include "bgpd/bgp_fsm.h" // for BGP_EVENT_ADD, bgp_event |
40 | #include "bgpd/bgp_packet.h" // for bgp_notify_send_with_data, bgp_notify... | |
c7bb4f00 | 41 | #include "bgpd/bgp_trace.h" // for frrtraces |
95158b0c QY |
42 | #include "bgpd/bgpd.h" // for peer, BGP_MARKER_SIZE, bgp_master, bm |
43 | /* clang-format on */ | |
56257a44 | 44 | |
424ab01d QY |
45 | /* forward declarations */ |
46 | static uint16_t bgp_write(struct peer *); | |
6af96fa3 | 47 | static uint16_t bgp_read(struct peer *peer, int *code_p); |
424ab01d QY |
48 | static int bgp_process_writes(struct thread *); |
49 | static int bgp_process_reads(struct thread *); | |
50 | static bool validate_header(struct peer *); | |
56257a44 | 51 | |
424ab01d | 52 | /* generic i/o status codes */ |
95158b0c QY |
53 | #define BGP_IO_TRANS_ERR (1 << 0) // EAGAIN or similar occurred |
54 | #define BGP_IO_FATAL_ERR (1 << 1) // some kind of fatal TCP error | |
56257a44 | 55 | |
a715eab3 | 56 | /* Thread external API ----------------------------------------------------- */ |
56257a44 | 57 | |
424ab01d | 58 | void bgp_writes_on(struct peer *peer) |
56257a44 | 59 | { |
1ac267a2 | 60 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 61 | assert(fpt->running); |
f09a656d | 62 | |
424ab01d QY |
63 | assert(peer->status != Deleted); |
64 | assert(peer->obuf); | |
65 | assert(peer->ibuf); | |
66 | assert(peer->ibuf_work); | |
387f984e QY |
67 | assert(!peer->t_connect_check_r); |
68 | assert(!peer->t_connect_check_w); | |
424ab01d | 69 | assert(peer->fd); |
56257a44 | 70 | |
b750b0ba QY |
71 | thread_add_write(fpt->master, bgp_process_writes, peer, peer->fd, |
72 | &peer->t_write); | |
73 | SET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); | |
424ab01d | 74 | } |
56257a44 | 75 | |
424ab01d QY |
76 | void bgp_writes_off(struct peer *peer) |
77 | { | |
1ac267a2 | 78 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 79 | assert(fpt->running); |
151044ce | 80 | |
b750b0ba QY |
81 | thread_cancel_async(fpt->master, &peer->t_write, NULL); |
82 | THREAD_OFF(peer->t_generate_updgrp_packets); | |
56257a44 | 83 | |
b750b0ba | 84 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON); |
56257a44 QY |
85 | } |
86 | ||
424ab01d | 87 | void bgp_reads_on(struct peer *peer) |
56257a44 | 88 | { |
1ac267a2 | 89 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 90 | assert(fpt->running); |
f09a656d | 91 | |
424ab01d QY |
92 | assert(peer->status != Deleted); |
93 | assert(peer->ibuf); | |
94 | assert(peer->fd); | |
95 | assert(peer->ibuf_work); | |
424ab01d | 96 | assert(peer->obuf); |
387f984e QY |
97 | assert(!peer->t_connect_check_r); |
98 | assert(!peer->t_connect_check_w); | |
424ab01d QY |
99 | assert(peer->fd); |
100 | ||
b750b0ba QY |
101 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
102 | &peer->t_read); | |
103 | ||
104 | SET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); | |
56257a44 QY |
105 | } |
106 | ||
424ab01d | 107 | void bgp_reads_off(struct peer *peer) |
56257a44 | 108 | { |
1ac267a2 | 109 | struct frr_pthread *fpt = bgp_pth_io; |
a715eab3 | 110 | assert(fpt->running); |
151044ce | 111 | |
b750b0ba QY |
112 | thread_cancel_async(fpt->master, &peer->t_read, NULL); |
113 | THREAD_OFF(peer->t_process_packet); | |
56257a44 | 114 | |
b750b0ba | 115 | UNSET_FLAG(peer->thread_flags, PEER_THREAD_READS_ON); |
56257a44 QY |
116 | } |
117 | ||
a715eab3 | 118 | /* Thread internal functions ----------------------------------------------- */ |
51abb4b4 | 119 | |
a715eab3 | 120 | /* |
51abb4b4 | 121 | * Called from I/O pthread when a file descriptor has become ready for writing. |
424ab01d QY |
122 | */ |
123 | static int bgp_process_writes(struct thread *thread) | |
56257a44 | 124 | { |
424ab01d QY |
125 | static struct peer *peer; |
126 | peer = THREAD_ARG(thread); | |
127 | uint16_t status; | |
b750b0ba | 128 | bool reschedule; |
bbac44ac | 129 | bool fatal = false; |
424ab01d QY |
130 | |
131 | if (peer->fd < 0) | |
132 | return -1; | |
133 | ||
1ac267a2 | 134 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 135 | |
00dffa8c | 136 | frr_with_mutex(&peer->io_mtx) { |
424ab01d QY |
137 | status = bgp_write(peer); |
138 | reschedule = (stream_fifo_head(peer->obuf) != NULL); | |
139 | } | |
56257a44 | 140 | |
a715eab3 QY |
141 | /* no problem */ |
142 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
56257a44 | 143 | } |
56257a44 | 144 | |
a715eab3 | 145 | /* problem */ |
bbac44ac | 146 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
a715eab3 | 147 | reschedule = false; |
bbac44ac QY |
148 | fatal = true; |
149 | } | |
424ab01d | 150 | |
a77e2f4b S |
151 | /* If suppress fib pending is enabled, route is advertised to peers when |
152 | * the status is received from the FIB. The delay is added | |
153 | * to update group packet generate which will allow more routes to be | |
154 | * sent in the update message | |
155 | */ | |
424ab01d QY |
156 | if (reschedule) { |
157 | thread_add_write(fpt->master, bgp_process_writes, peer, | |
158 | peer->fd, &peer->t_write); | |
b785b7ad | 159 | } else if (!fatal) { |
a77e2f4b S |
160 | BGP_UPDATE_GROUP_TIMER_ON(&peer->t_generate_updgrp_packets, |
161 | bgp_generate_updgrp_packets); | |
424ab01d QY |
162 | } |
163 | ||
164 | return 0; | |
56257a44 QY |
165 | } |
166 | ||
a715eab3 | 167 | /* |
51abb4b4 QY |
168 | * Called from I/O pthread when a file descriptor has become ready for reading, |
169 | * or has hung up. | |
9eb217ff QY |
170 | * |
171 | * We read as much data as possible, process as many packets as we can and | |
172 | * place them on peer->ibuf for secondary processing by the main thread. | |
56257a44 | 173 | */ |
424ab01d | 174 | static int bgp_process_reads(struct thread *thread) |
56257a44 | 175 | { |
e11eeb8c QY |
176 | /* clang-format off */ |
177 | static struct peer *peer; // peer to read from | |
178 | uint16_t status; // bgp_read status code | |
179 | bool more = true; // whether we got more data | |
180 | bool fatal = false; // whether fatal error occurred | |
181 | bool added_pkt = false; // whether we pushed onto ->ibuf | |
b8cfb2cd | 182 | int code = 0; // FSM code if error occurred |
e11eeb8c | 183 | /* clang-format on */ |
9eb217ff | 184 | |
424ab01d | 185 | peer = THREAD_ARG(thread); |
424ab01d | 186 | |
97b4a0ec | 187 | if (peer->fd < 0 || bm->terminating) |
424ab01d QY |
188 | return -1; |
189 | ||
1ac267a2 | 190 | struct frr_pthread *fpt = bgp_pth_io; |
424ab01d | 191 | |
00dffa8c | 192 | frr_with_mutex(&peer->io_mtx) { |
6af96fa3 | 193 | status = bgp_read(peer, &code); |
424ab01d | 194 | } |
424ab01d | 195 | |
9eb217ff QY |
196 | /* error checking phase */ |
197 | if (CHECK_FLAG(status, BGP_IO_TRANS_ERR)) { | |
198 | /* no problem; just don't process packets */ | |
199 | more = false; | |
200 | } | |
424ab01d | 201 | |
9eb217ff QY |
202 | if (CHECK_FLAG(status, BGP_IO_FATAL_ERR)) { |
203 | /* problem; tear down session */ | |
204 | more = false; | |
205 | fatal = true; | |
6af96fa3 MS |
206 | |
207 | /* Handle the error in the main pthread, include the | |
208 | * specific state change from 'bgp_read'. | |
209 | */ | |
210 | thread_add_event(bm->master, bgp_packet_process_error, | |
211 | peer, code, NULL); | |
56257a44 | 212 | } |
56257a44 | 213 | |
9eb217ff QY |
214 | while (more) { |
215 | /* static buffer for transferring packets */ | |
9eb217ff | 216 | /* shorter alias to peer's input buffer */ |
74ffbfe6 | 217 | struct ringbuf *ibw = peer->ibuf_work; |
9eb217ff | 218 | /* packet size as given by header */ |
74ffbfe6 | 219 | uint16_t pktsize = 0; |
9eb217ff QY |
220 | |
221 | /* check that we have enough data for a header */ | |
74ffbfe6 | 222 | if (ringbuf_remain(ibw) < BGP_HEADER_SIZE) |
9eb217ff | 223 | break; |
424ab01d | 224 | |
a2b6e694 | 225 | /* check that header is valid */ |
226 | if (!validate_header(peer)) { | |
9eb217ff QY |
227 | fatal = true; |
228 | break; | |
424ab01d | 229 | } |
424ab01d | 230 | |
9eb217ff | 231 | /* header is valid; retrieve packet size */ |
74ffbfe6 QY |
232 | ringbuf_peek(ibw, BGP_MARKER_SIZE, &pktsize, sizeof(pktsize)); |
233 | ||
234 | pktsize = ntohs(pktsize); | |
424ab01d | 235 | |
9eb217ff | 236 | /* if this fails we are seriously screwed */ |
ef56aee4 | 237 | assert(pktsize <= peer->max_packet_size); |
9eb217ff | 238 | |
a715eab3 QY |
239 | /* |
240 | * If we have that much data, chuck it into its own | |
241 | * stream and append to input queue for processing. | |
242 | */ | |
74ffbfe6 | 243 | if (ringbuf_remain(ibw) >= pktsize) { |
9eb217ff | 244 | struct stream *pkt = stream_new(pktsize); |
6af96fa3 | 245 | |
7c9d82cd S |
246 | assert(STREAM_WRITEABLE(pkt) == pktsize); |
247 | assert(ringbuf_get(ibw, pkt->data, pktsize) == pktsize); | |
248 | stream_set_endp(pkt, pktsize); | |
9eb217ff | 249 | |
0c3436aa | 250 | frrtrace(2, frr_bgp, packet_read, peer, pkt); |
00dffa8c | 251 | frr_with_mutex(&peer->io_mtx) { |
9eb217ff QY |
252 | stream_fifo_push(peer->ibuf, pkt); |
253 | } | |
9eb217ff QY |
254 | |
255 | added_pkt = true; | |
256 | } else | |
257 | break; | |
258 | } | |
259 | ||
9eb217ff QY |
260 | /* handle invalid header */ |
261 | if (fatal) { | |
9eb217ff | 262 | /* wipe buffer just in case someone screwed up */ |
74ffbfe6 | 263 | ringbuf_wipe(peer->ibuf_work); |
9eb217ff | 264 | } else { |
ef56aee4 | 265 | assert(ringbuf_space(peer->ibuf_work) >= peer->max_packet_size); |
421a7dfc | 266 | |
424ab01d QY |
267 | thread_add_read(fpt->master, bgp_process_reads, peer, peer->fd, |
268 | &peer->t_read); | |
9eb217ff | 269 | if (added_pkt) |
e0d550df MS |
270 | thread_add_event(bm->master, bgp_process_packet, |
271 | peer, 0, &peer->t_process_packet); | |
9eb217ff | 272 | } |
424ab01d QY |
273 | |
274 | return 0; | |
56257a44 QY |
275 | } |
276 | ||
a715eab3 | 277 | /* |
56257a44 QY |
278 | * Flush peer output buffer. |
279 | * | |
280 | * This function pops packets off of peer->obuf and writes them to peer->fd. | |
281 | * The amount of packets written is equal to the minimum of peer->wpkt_quanta | |
424ab01d | 282 | * and the number of packets on the output buffer, unless an error occurs. |
56257a44 QY |
283 | * |
284 | * If write() returns an error, the appropriate FSM event is generated. | |
285 | * | |
286 | * The return value is equal to the number of packets written | |
287 | * (which may be zero). | |
288 | */ | |
424ab01d | 289 | static uint16_t bgp_write(struct peer *peer) |
56257a44 | 290 | { |
d7c0a89a | 291 | uint8_t type; |
56257a44 | 292 | struct stream *s; |
56257a44 | 293 | int update_last_write = 0; |
093279cd | 294 | unsigned int count; |
eb2277cf | 295 | uint32_t uo = 0; |
424ab01d | 296 | uint16_t status = 0; |
555e09d4 | 297 | uint32_t wpkt_quanta_old; |
56257a44 | 298 | |
093279cd QY |
299 | int writenum = 0; |
300 | int num; | |
301 | unsigned int iovsz; | |
302 | unsigned int strmsz; | |
303 | unsigned int total_written; | |
304 | ||
996c9314 LB |
305 | wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta, |
306 | memory_order_relaxed); | |
093279cd QY |
307 | struct stream *ostreams[wpkt_quanta_old]; |
308 | struct stream **streams = ostreams; | |
309 | struct iovec iov[wpkt_quanta_old]; | |
310 | ||
311 | s = stream_fifo_head(peer->obuf); | |
312 | ||
313 | if (!s) | |
314 | goto done; | |
315 | ||
316 | count = iovsz = 0; | |
317 | while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) { | |
318 | ostreams[iovsz] = s; | |
319 | iov[iovsz].iov_base = stream_pnt(s); | |
320 | iov[iovsz].iov_len = STREAM_READABLE(s); | |
321 | writenum += STREAM_READABLE(s); | |
322 | s = s->next; | |
323 | ++iovsz; | |
324 | ++count; | |
325 | } | |
326 | ||
327 | strmsz = iovsz; | |
328 | total_written = 0; | |
329 | ||
330 | do { | |
331 | num = writev(peer->fd, iov, iovsz); | |
332 | ||
333 | if (num < 0) { | |
334 | if (!ERRNO_IO_RETRY(errno)) { | |
335 | BGP_EVENT_ADD(peer, TCP_fatal_error); | |
336 | SET_FLAG(status, BGP_IO_FATAL_ERR); | |
337 | } else { | |
338 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
339 | } | |
340 | ||
341 | break; | |
342 | } else if (num != writenum) { | |
343 | unsigned int msg_written = 0; | |
344 | unsigned int ic = iovsz; | |
345 | ||
346 | for (unsigned int i = 0; i < ic; i++) { | |
347 | size_t ss = iov[i].iov_len; | |
555e09d4 | 348 | |
093279cd QY |
349 | if (ss > (unsigned int) num) |
350 | break; | |
56257a44 | 351 | |
093279cd QY |
352 | msg_written++; |
353 | iovsz--; | |
354 | writenum -= ss; | |
355 | num -= ss; | |
356 | } | |
56257a44 | 357 | |
093279cd | 358 | total_written += msg_written; |
56257a44 | 359 | |
18555366 QY |
360 | assert(total_written < count); |
361 | ||
093279cd QY |
362 | memmove(&iov, &iov[msg_written], |
363 | sizeof(iov[0]) * iovsz); | |
364 | streams = &streams[msg_written]; | |
365 | stream_forward_getp(streams[0], num); | |
366 | iov[0].iov_base = stream_pnt(streams[0]); | |
367 | iov[0].iov_len = STREAM_READABLE(streams[0]); | |
368 | ||
369 | writenum -= num; | |
370 | num = 0; | |
371 | assert(writenum > 0); | |
372 | } else { | |
373 | total_written = strmsz; | |
374 | } | |
375 | ||
376 | } while (num != writenum); | |
377 | ||
378 | /* Handle statistics */ | |
379 | for (unsigned int i = 0; i < total_written; i++) { | |
380 | s = stream_fifo_pop(peer->obuf); | |
381 | ||
382 | assert(s == ostreams[i]); | |
56257a44 QY |
383 | |
384 | /* Retrieve BGP packet type. */ | |
385 | stream_set_getp(s, BGP_MARKER_SIZE + 2); | |
386 | type = stream_getc(s); | |
387 | ||
388 | switch (type) { | |
389 | case BGP_MSG_OPEN: | |
1588f6f4 QY |
390 | atomic_fetch_add_explicit(&peer->open_out, 1, |
391 | memory_order_relaxed); | |
56257a44 QY |
392 | break; |
393 | case BGP_MSG_UPDATE: | |
1588f6f4 QY |
394 | atomic_fetch_add_explicit(&peer->update_out, 1, |
395 | memory_order_relaxed); | |
eb2277cf | 396 | uo++; |
56257a44 QY |
397 | break; |
398 | case BGP_MSG_NOTIFY: | |
1588f6f4 QY |
399 | atomic_fetch_add_explicit(&peer->notify_out, 1, |
400 | memory_order_relaxed); | |
56257a44 QY |
401 | /* Double start timer. */ |
402 | peer->v_start *= 2; | |
403 | ||
404 | /* Overflow check. */ | |
405 | if (peer->v_start >= (60 * 2)) | |
406 | peer->v_start = (60 * 2); | |
407 | ||
a715eab3 QY |
408 | /* |
409 | * Handle Graceful Restart case where the state changes | |
410 | * to Connect instead of Idle. | |
411 | */ | |
56257a44 QY |
412 | BGP_EVENT_ADD(peer, BGP_Stop); |
413 | goto done; | |
414 | ||
415 | case BGP_MSG_KEEPALIVE: | |
1588f6f4 QY |
416 | atomic_fetch_add_explicit(&peer->keepalive_out, 1, |
417 | memory_order_relaxed); | |
56257a44 QY |
418 | break; |
419 | case BGP_MSG_ROUTE_REFRESH_NEW: | |
420 | case BGP_MSG_ROUTE_REFRESH_OLD: | |
1588f6f4 QY |
421 | atomic_fetch_add_explicit(&peer->refresh_out, 1, |
422 | memory_order_relaxed); | |
56257a44 QY |
423 | break; |
424 | case BGP_MSG_CAPABILITY: | |
1588f6f4 QY |
425 | atomic_fetch_add_explicit(&peer->dynamic_cap_out, 1, |
426 | memory_order_relaxed); | |
56257a44 QY |
427 | break; |
428 | } | |
429 | ||
093279cd QY |
430 | stream_free(s); |
431 | ostreams[i] = NULL; | |
56257a44 QY |
432 | update_last_write = 1; |
433 | } | |
434 | ||
435 | done : { | |
eb2277cf LB |
436 | /* |
437 | * Update last_update if UPDATEs were written. | |
438 | * Note: that these are only updated at end, | |
439 | * not per message (i.e., per loop) | |
440 | */ | |
441 | if (uo) | |
1588f6f4 QY |
442 | atomic_store_explicit(&peer->last_update, bgp_clock(), |
443 | memory_order_relaxed); | |
56257a44 | 444 | |
5c075a90 | 445 | /* If we TXed any flavor of packet */ |
56257a44 | 446 | if (update_last_write) |
1588f6f4 QY |
447 | atomic_store_explicit(&peer->last_write, bgp_clock(), |
448 | memory_order_relaxed); | |
56257a44 QY |
449 | } |
450 | ||
424ab01d QY |
451 | return status; |
452 | } | |
453 | ||
a715eab3 | 454 | /* |
51abb4b4 | 455 | * Reads a chunk of data from peer->fd into peer->ibuf_work. |
424ab01d | 456 | * |
b8cfb2cd QY |
457 | * code_p |
458 | * Pointer to location to store FSM event code in case of fatal error. | |
459 | * | |
51abb4b4 | 460 | * @return status flag (see top-of-file) |
424ab01d | 461 | */ |
6af96fa3 | 462 | static uint16_t bgp_read(struct peer *peer, int *code_p) |
424ab01d | 463 | { |
fe2e3bae | 464 | size_t readsize; // how many bytes we want to read |
b750b0ba | 465 | ssize_t nbytes; // how many bytes we actually read |
424ab01d QY |
466 | uint16_t status = 0; |
467 | ||
338f4a78 QY |
468 | readsize = |
469 | MIN(ringbuf_space(peer->ibuf_work), sizeof(peer->ibuf_scratch)); | |
470 | nbytes = read(peer->fd, peer->ibuf_scratch, readsize); | |
424ab01d | 471 | |
74ffbfe6 QY |
472 | /* EAGAIN or EWOULDBLOCK; come back later */ |
473 | if (nbytes < 0 && ERRNO_IO_RETRY(errno)) { | |
474 | SET_FLAG(status, BGP_IO_TRANS_ERR); | |
74ffbfe6 | 475 | } else if (nbytes < 0) { |
6af96fa3 | 476 | /* Fatal error; tear down session */ |
e50f7cfd | 477 | flog_err(EC_BGP_UPDATE_RCV, |
1c50c1c0 QY |
478 | "%s [Error] bgp_read_packet error: %s", peer->host, |
479 | safe_strerror(errno)); | |
85145b62 | 480 | |
6af96fa3 MS |
481 | /* Handle the error in the main pthread. */ |
482 | if (code_p) | |
483 | *code_p = TCP_fatal_error; | |
424ab01d | 484 | |
85145b62 | 485 | SET_FLAG(status, BGP_IO_FATAL_ERR); |
6af96fa3 | 486 | |
74ffbfe6 | 487 | } else if (nbytes == 0) { |
6af96fa3 | 488 | /* Received EOF / TCP session closed */ |
85145b62 QY |
489 | if (bgp_debug_neighbor_events(peer)) |
490 | zlog_debug("%s [Event] BGP connection closed fd %d", | |
36235319 | 491 | peer->host, peer->fd); |
85145b62 | 492 | |
6af96fa3 MS |
493 | /* Handle the error in the main pthread. */ |
494 | if (code_p) | |
495 | *code_p = TCP_connection_closed; | |
424ab01d | 496 | |
85145b62 | 497 | SET_FLAG(status, BGP_IO_FATAL_ERR); |
fe2e3bae | 498 | } else { |
338f4a78 | 499 | assert(ringbuf_put(peer->ibuf_work, peer->ibuf_scratch, nbytes) |
fe2e3bae | 500 | == (size_t)nbytes); |
424ab01d QY |
501 | } |
502 | ||
424ab01d QY |
503 | return status; |
504 | } | |
505 | ||
506 | /* | |
507 | * Called after we have read a BGP packet header. Validates marker, message | |
508 | * type and packet length. If any of these aren't correct, sends a notify. | |
74ffbfe6 QY |
509 | * |
510 | * Assumes that there are at least BGP_HEADER_SIZE readable bytes in the input | |
511 | * buffer. | |
424ab01d QY |
512 | */ |
513 | static bool validate_header(struct peer *peer) | |
514 | { | |
3fe63c29 QY |
515 | uint16_t size; |
516 | uint8_t type; | |
74ffbfe6 | 517 | struct ringbuf *pkt = peer->ibuf_work; |
424ab01d | 518 | |
2b64873d | 519 | static const uint8_t m_correct[BGP_MARKER_SIZE] = { |
74ffbfe6 QY |
520 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, |
521 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; | |
522 | uint8_t m_rx[BGP_MARKER_SIZE] = {0x00}; | |
442c9afb | 523 | |
74ffbfe6 QY |
524 | if (ringbuf_peek(pkt, 0, m_rx, BGP_MARKER_SIZE) != BGP_MARKER_SIZE) |
525 | return false; | |
526 | ||
527 | if (memcmp(m_correct, m_rx, BGP_MARKER_SIZE) != 0) { | |
442c9afb QY |
528 | bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR, |
529 | BGP_NOTIFY_HEADER_NOT_SYNC); | |
530 | return false; | |
531 | } | |
424ab01d | 532 | |
74ffbfe6 QY |
533 | /* Get size and type in network byte order. */ |
534 | ringbuf_peek(pkt, BGP_MARKER_SIZE, &size, sizeof(size)); | |
535 | ringbuf_peek(pkt, BGP_MARKER_SIZE + 2, &type, sizeof(type)); | |
536 | ||
537 | size = ntohs(size); | |
424ab01d QY |
538 | |
539 | /* BGP type check. */ | |
540 | if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE | |
541 | && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE | |
542 | && type != BGP_MSG_ROUTE_REFRESH_NEW | |
543 | && type != BGP_MSG_ROUTE_REFRESH_OLD | |
544 | && type != BGP_MSG_CAPABILITY) { | |
3fe63c29 | 545 | if (bgp_debug_neighbor_events(peer)) |
424ab01d QY |
546 | zlog_debug("%s unknown message type 0x%02x", peer->host, |
547 | type); | |
548 | ||
549 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, | |
996c9314 LB |
550 | BGP_NOTIFY_HEADER_BAD_MESTYPE, &type, |
551 | 1); | |
424ab01d QY |
552 | return false; |
553 | } | |
554 | ||
3fe63c29 | 555 | /* Minimum packet length check. */ |
ef56aee4 | 556 | if ((size < BGP_HEADER_SIZE) || (size > peer->max_packet_size) |
424ab01d QY |
557 | || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE) |
558 | || (type == BGP_MSG_UPDATE && size < BGP_MSG_UPDATE_MIN_SIZE) | |
559 | || (type == BGP_MSG_NOTIFY && size < BGP_MSG_NOTIFY_MIN_SIZE) | |
560 | || (type == BGP_MSG_KEEPALIVE && size != BGP_MSG_KEEPALIVE_MIN_SIZE) | |
561 | || (type == BGP_MSG_ROUTE_REFRESH_NEW | |
562 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
563 | || (type == BGP_MSG_ROUTE_REFRESH_OLD | |
564 | && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE) | |
565 | || (type == BGP_MSG_CAPABILITY | |
566 | && size < BGP_MSG_CAPABILITY_MIN_SIZE)) { | |
1588f6f4 | 567 | if (bgp_debug_neighbor_events(peer)) { |
424ab01d QY |
568 | zlog_debug("%s bad message length - %d for %s", |
569 | peer->host, size, | |
570 | type == 128 ? "ROUTE-REFRESH" | |
996c9314 | 571 | : bgp_type_str[(int)type]); |
1588f6f4 | 572 | } |
424ab01d | 573 | |
3fe63c29 QY |
574 | uint16_t nsize = htons(size); |
575 | ||
424ab01d QY |
576 | bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR, |
577 | BGP_NOTIFY_HEADER_BAD_MESLEN, | |
996c9314 | 578 | (unsigned char *)&nsize, 2); |
424ab01d QY |
579 | return false; |
580 | } | |
581 | ||
582 | return true; | |
56257a44 | 583 | } |