]> git.proxmox.com Git - mirror_frr.git/commitdiff
bgpd: vector I/O
authorQuentin Young <qlyoung@cumulusnetworks.com>
Fri, 4 Oct 2019 18:52:24 +0000 (18:52 +0000)
committerQuentin Young <qlyoung@cumulusnetworks.com>
Mon, 14 Oct 2019 18:41:53 +0000 (18:41 +0000)
Signed-off-by: Quentin Young <qlyoung@cumulusnetworks.com>
bgpd/bgp_io.c

index c8b7e5155ff0b5ce54bdf29116b242fc68d0dd37..c4555afc5296cd89a5455a103cffa288e77cab39 100644 (file)
@@ -22,6 +22,7 @@
 /* clang-format off */
 #include <zebra.h>
 #include <pthread.h>           // for pthread_mutex_unlock, pthread_mutex_lock
+#include <sys/uio.h>           // for writev
 
 #include "frr_pthread.h"
 #include "linklist.h"          // for list_delete, list_delete_all_node, lis...
@@ -275,35 +276,94 @@ static uint16_t bgp_write(struct peer *peer)
 {
        uint8_t type;
        struct stream *s;
-       int num;
        int update_last_write = 0;
-       unsigned int count = 0;
+       unsigned int count;
        uint32_t uo = 0;
        uint16_t status = 0;
        uint32_t wpkt_quanta_old;
 
+       int writenum = 0;
+       int num;
+       unsigned int iovsz;
+       unsigned int strmsz;
+       unsigned int total_written;
+
        wpkt_quanta_old = atomic_load_explicit(&peer->bgp->wpkt_quanta,
                                               memory_order_relaxed);
+       struct stream *ostreams[wpkt_quanta_old];
+       struct stream **streams = ostreams;
+       struct iovec iov[wpkt_quanta_old];
+
+       s = stream_fifo_head(peer->obuf);
+
+       if (!s)
+               goto done;
+
+       count = iovsz = 0;
+       while (count < wpkt_quanta_old && iovsz < array_size(iov) && s) {
+               ostreams[iovsz] = s;
+               iov[iovsz].iov_base = stream_pnt(s);
+               iov[iovsz].iov_len = STREAM_READABLE(s);
+               writenum += STREAM_READABLE(s);
+               s = s->next;
+               ++iovsz;
+               ++count;
+       }
+
+       strmsz = iovsz;
+       total_written = 0;
+
+       do {
+               num = writev(peer->fd, iov, iovsz);
+
+               if (num < 0) {
+                       if (!ERRNO_IO_RETRY(errno)) {
+                               BGP_EVENT_ADD(peer, TCP_fatal_error);
+                               SET_FLAG(status, BGP_IO_FATAL_ERR);
+                       } else {
+                               SET_FLAG(status, BGP_IO_TRANS_ERR);
+                       }
+
+                       break;
+               } else if (num != writenum) {
+                       unsigned int msg_written = 0;
+                       unsigned int ic = iovsz;
+
+                       for (unsigned int i = 0; i < ic; i++) {
+                               size_t ss = iov[i].iov_len;
 
-       while (count < wpkt_quanta_old && (s = stream_fifo_head(peer->obuf))) {
-               int writenum;
-               do {
-                       writenum = stream_get_endp(s) - stream_get_getp(s);
-                       num = write(peer->fd, stream_pnt(s), writenum);
+                               if (ss > (unsigned int) num)
+                                       break;
 
-                       if (num < 0) {
-                               if (!ERRNO_IO_RETRY(errno)) {
-                                       BGP_EVENT_ADD(peer, TCP_fatal_error);
-                                       SET_FLAG(status, BGP_IO_FATAL_ERR);
-                               } else {
-                                       SET_FLAG(status, BGP_IO_TRANS_ERR);
-                               }
+                               msg_written++;
+                               iovsz--;
+                               writenum -= ss;
+                               num -= ss;
+                       }
 
-                               goto done;
-                       } else if (num != writenum)
-                               stream_forward_getp(s, num);
+                       total_written += msg_written;
 
-               } while (num != writenum);
+                       memmove(&iov, &iov[msg_written],
+                               sizeof(iov[0]) * iovsz);
+                       streams = &streams[msg_written];
+                       stream_forward_getp(streams[0], num);
+                       iov[0].iov_base = stream_pnt(streams[0]);
+                       iov[0].iov_len = STREAM_READABLE(streams[0]);
+
+                       writenum -= num;
+                       num = 0;
+                       assert(writenum > 0);
+               } else {
+                       total_written = strmsz;
+               }
+
+       } while (num != writenum);
+
+       /* Handle statistics */
+       for (unsigned int i = 0; i < total_written; i++) {
+               s = stream_fifo_pop(peer->obuf);
+
+               assert(s == ostreams[i]);
 
                /* Retrieve BGP packet type. */
                stream_set_getp(s, BGP_MARKER_SIZE + 2);
@@ -351,9 +411,8 @@ static uint16_t bgp_write(struct peer *peer)
                        break;
                }
 
-               count++;
-
-               stream_free(stream_fifo_pop(peer->obuf));
+               stream_free(s);
+               ostreams[i] = NULL;
                update_last_write = 1;
        }