]> git.proxmox.com Git - mirror_frr.git/commitdiff
bgpd: improve labelpool performance at scale
authorG. Paul Ziemba <paulz@labn.net>
Fri, 26 Aug 2022 21:47:07 +0000 (14:47 -0700)
committerG. Paul Ziemba <paulz@labn.net>
Wed, 31 Aug 2022 15:21:27 +0000 (08:21 -0700)
    - double the size of each new chunk request from zebra
    - use bitfields to track label allocations in a chunk
    - When allocating:
        - skip chunks with no free labels
        - search biggest chunks first
        - start search in chunk where last search ended
    - Improve API documentation in comments (bgp_lp_get() and callback)
    - Tweak formatting of "show bgp labelpool chunks"
    - Add test features (compiled conditionally on BGP_LABELPOOL_ENABLE_TESTS)

Signed-off-by: G. Paul Ziemba <paulz@labn.net>
bgpd/bgp_labelpool.c
bgpd/bgp_labelpool.h
bgpd/subdir.am
lib/bitfield.h
tests/topotests/bgp_lu_topo1/R1/labelpool.summ.json
tests/topotests/bgp_lu_topo2/R1/labelpool.summ.json

index fa1dcf33e02d24c14b7ccfb0315b47f5e30a4f2f..c227a5e41c403b2bd206d2f445546eeb65a6e734 100644 (file)
 #include "bgpd/bgp_errors.h"
 #include "bgpd/bgp_route.h"
 
+#define BGP_LABELPOOL_ENABLE_TESTS 0
+
+#ifndef VTYSH_EXTRACT_PL
+#include "bgpd/bgp_labelpool_clippy.c"
+#endif
+
+
 /*
  * Definitions and external declarations.
  */
 extern struct zclient *zclient;
 
+#if BGP_LABELPOOL_ENABLE_TESTS
+static void lptest_init(void);
+static void lptest_finish(void);
+#endif
+
 /*
  * Remember where pool data are kept
  */
 static struct labelpool *lp;
 
-/* request this many labels at a time from zebra */
-#define LP_CHUNK_SIZE  50
+/*
+ * Number of labels requested at a time from the zebra label manager.
+ * We start small but double the request size each time up to a
+ * maximum size.
+ *
+ * The label space is 20 bits which is shared with other FRR processes
+ * on this host, so to avoid greedily requesting a mostly wasted chunk,
+ * we limit the chunk size to 1/16 of the label space (that's the -4 bits
+ * in the definition below). This limit slightly increases our cost of
+ * finding free labels in our allocated chunks.
+ */
+#define LP_CHUNK_SIZE_MIN 128
+#define LP_CHUNK_SIZE_MAX (1 << (20 - 4))
 
 DEFINE_MTYPE_STATIC(BGPD, BGP_LABEL_CHUNK, "BGP Label Chunk");
 DEFINE_MTYPE_STATIC(BGPD, BGP_LABEL_FIFO, "BGP Label FIFO item");
@@ -58,6 +81,9 @@ DEFINE_MTYPE_STATIC(BGPD, BGP_LABEL_CBQ, "BGP Dynamic Label Callback");
 struct lp_chunk {
        uint32_t        first;
        uint32_t        last;
+       uint32_t nfree;              /* un-allocated count */
+       uint32_t idx_last_allocated; /* start looking here */
+       bitfield_t allocated_map;
 };
 
 /*
@@ -160,6 +186,9 @@ static void lp_lcb_free(void *goner)
 
 static void lp_chunk_free(void *goner)
 {
+       struct lp_chunk *chunk = (struct lp_chunk *)goner;
+
+       bf_free(chunk->allocated_map);
        XFREE(MTYPE_BGP_LABEL_CHUNK, goner);
 }
 
@@ -180,6 +209,12 @@ void bgp_lp_init(struct thread_master *master, struct labelpool *pool)
        lp->callback_q->spec.workfunc = lp_cbq_docallback;
        lp->callback_q->spec.del_item_data = lp_cbq_item_free;
        lp->callback_q->spec.max_retries = 0;
+
+       lp->next_chunksize = LP_CHUNK_SIZE_MIN;
+
+#if BGP_LABELPOOL_ENABLE_TESTS
+       lptest_init();
+#endif
 }
 
 /* check if a label callback was for a BGP LU node, and if so, unlock it */
@@ -201,6 +236,9 @@ void bgp_lp_finish(void)
        struct lp_fifo *lf;
        struct work_queue_item *item, *titem;
 
+#if BGP_LABELPOOL_ENABLE_TESTS
+       lptest_finish();
+#endif
        if (!lp)
                return;
 
@@ -240,25 +278,53 @@ static mpls_label_t get_label_from_pool(void *labelid)
 
        /*
         * Find a free label
-        * Linear search is not efficient but should be executed infrequently.
         */
        for (ALL_LIST_ELEMENTS_RO(lp->chunks, node, chunk)) {
                uintptr_t lbl;
+               unsigned int index;
 
                if (debug)
                        zlog_debug("%s: chunk first=%u last=%u",
                                __func__, chunk->first, chunk->last);
 
-               for (lbl = chunk->first; lbl <= chunk->last; ++lbl) {
-                       /* labelid is key to all-request "ledger" list */
-                       if (!skiplist_insert(lp->inuse, (void *)lbl, labelid)) {
-                               /*
-                                * Success
-                                */
-                               return lbl;
-                       }
+               /*
+                * don't look in chunks with no available labels
+                */
+               if (!chunk->nfree)
+                       continue;
+
+               /*
+                * roll through bitfield starting where we stopped
+                * last time
+                */
+               index = bf_find_next_clear_bit_wrap(
+                       &chunk->allocated_map, chunk->idx_last_allocated + 1,
+                       0);
+
+               /*
+                * since chunk->nfree is non-zero, we should always get
+                * a valid index
+                */
+               assert(index != WORD_MAX);
+
+               lbl = chunk->first + index;
+               if (skiplist_insert(lp->inuse, (void *)lbl, labelid)) {
+                       /* something is very wrong */
+                       zlog_err("%s: unable to insert inuse label %u (id %p)",
+                                __func__, (uint32_t)lbl, labelid);
+                       return MPLS_LABEL_NONE;
                }
+
+               /*
+                * Success
+                */
+               bf_set_bit(chunk->allocated_map, index);
+               chunk->idx_last_allocated = index;
+               chunk->nfree -= 1;
+
+               return lbl;
        }
+
        return MPLS_LABEL_NONE;
 }
 
@@ -299,10 +365,14 @@ static struct lp_lcb *lcb_alloc(
  * When connection to zebra is reestablished, previous label assignments
  * will be invalidated (via callbacks having the "allocated" parameter unset)
  * and new labels will be automatically reassigned by this labelpool module
- * (that is, a requestor does not need to call lp_get() again if it is
+ * (that is, a requestor does not need to call bgp_lp_get() again if it is
  * notified via callback that its label has been lost: it will eventually
  * get another callback with a new label assignment).
  *
+ * The callback function should return 0 to accept the allocation
+ * and non-zero to refuse it. The callback function return value is
+ * ignored for invalidations (i.e., when the "allocated" parameter is false)
+ *
  * Prior requests for a given labelid are detected so that requests and
  * assignments are not duplicated.
  */
@@ -392,10 +462,13 @@ void bgp_lp_get(
        if (lp_fifo_count(&lp->requests) > lp->pending_count) {
                if (!zclient || zclient->sock < 0)
                        return;
-               if (zclient_send_get_label_chunk(zclient, 0, LP_CHUNK_SIZE,
-                                                MPLS_LABEL_BASE_ANY)
-                   != ZCLIENT_SEND_FAILURE)
-                       lp->pending_count += LP_CHUNK_SIZE;
+               if (zclient_send_get_label_chunk(zclient, 0, lp->next_chunksize,
+                                                MPLS_LABEL_BASE_ANY) !=
+                   ZCLIENT_SEND_FAILURE) {
+                       lp->pending_count += lp->next_chunksize;
+                       if ((lp->next_chunksize << 1) <= LP_CHUNK_SIZE_MAX)
+                               lp->next_chunksize <<= 1;
+               }
        }
 }
 
@@ -408,13 +481,36 @@ void bgp_lp_release(
 
        if (!skiplist_search(lp->ledger, labelid, (void **)&lcb)) {
                if (label == lcb->label && type == lcb->type) {
+                       struct listnode *node;
+                       struct lp_chunk *chunk;
                        uintptr_t lbl = label;
+                       bool deallocated = false;
 
                        /* no longer in use */
                        skiplist_delete(lp->inuse, (void *)lbl, NULL);
 
                        /* no longer requested */
                        skiplist_delete(lp->ledger, labelid, NULL);
+
+                       /*
+                        * Find the chunk this label belongs to and
+                        * deallocate the label
+                        */
+                       for (ALL_LIST_ELEMENTS_RO(lp->chunks, node, chunk)) {
+                               uint32_t index;
+
+                               if ((label < chunk->first) ||
+                                   (label > chunk->last))
+                                       continue;
+
+                               index = label - chunk->first;
+                               assert(bf_test_index(chunk->allocated_map,
+                                                    index));
+                               bf_release_index(chunk->allocated_map, index);
+                               chunk->nfree += 1;
+                               deallocated = true;
+                       }
+                       assert(deallocated);
                }
        }
 }
@@ -427,6 +523,7 @@ void bgp_lp_event_chunk(uint8_t keep, uint32_t first, uint32_t last)
        struct lp_chunk *chunk;
        int debug = BGP_DEBUG(labelpool, LABELPOOL);
        struct lp_fifo *lf;
+       uint32_t labelcount;
 
        if (last < first) {
                flog_err(EC_BGP_LABEL,
@@ -437,19 +534,27 @@ void bgp_lp_event_chunk(uint8_t keep, uint32_t first, uint32_t last)
 
        chunk = XCALLOC(MTYPE_BGP_LABEL_CHUNK, sizeof(struct lp_chunk));
 
+       labelcount = last - first + 1;
+
        chunk->first = first;
        chunk->last = last;
+       chunk->nfree = labelcount;
+       bf_init(chunk->allocated_map, labelcount);
 
-       listnode_add(lp->chunks, chunk);
+       /*
+        * Optimize for allocation by adding the new (presumably larger)
+        * chunk at the head of the list so it is examined first.
+        */
+       listnode_add_head(lp->chunks, chunk);
 
-       lp->pending_count -= (last - first + 1);
+       lp->pending_count -= labelcount;
 
        if (debug) {
                zlog_debug("%s: %zu pending requests", __func__,
                        lp_fifo_count(&lp->requests));
        }
 
-       while ((lf = lp_fifo_first(&lp->requests))) {
+       while (labelcount && (lf = lp_fifo_first(&lp->requests))) {
 
                struct lp_lcb *lcb;
                void *labelid = lf->lcb.labelid;
@@ -495,6 +600,8 @@ void bgp_lp_event_chunk(uint8_t keep, uint32_t first, uint32_t last)
                        break;
                }
 
+               labelcount -= 1;
+
                /*
                 * we filled the request from local pool.
                 * Enqueue response work item with new label.
@@ -535,8 +642,8 @@ void bgp_lp_event_zebra_down(void)
  */
 void bgp_lp_event_zebra_up(void)
 {
-       int labels_needed;
-       int chunks_needed;
+       unsigned int labels_needed;
+       unsigned int chunks_needed;
        void *labelid;
        struct lp_lcb *lcb;
        int lm_init_ok;
@@ -548,9 +655,16 @@ void bgp_lp_event_zebra_up(void)
        labels_needed = lp_fifo_count(&lp->requests) +
                skiplist_count(lp->inuse);
 
+       if (labels_needed > lp->next_chunksize) {
+               while ((lp->next_chunksize < labels_needed) &&
+                      (lp->next_chunksize << 1 <= LP_CHUNK_SIZE_MAX))
+
+                       lp->next_chunksize <<= 1;
+       }
+
        /* round up */
-       chunks_needed = (labels_needed / LP_CHUNK_SIZE) + 1;
-       labels_needed = chunks_needed * LP_CHUNK_SIZE;
+       chunks_needed = (labels_needed / lp->next_chunksize) + 1;
+       labels_needed = chunks_needed * lp->next_chunksize;
 
        lm_init_ok = lm_label_manager_connect(zclient, 1) == 0;
 
@@ -937,25 +1051,496 @@ DEFUN(show_bgp_labelpool_chunks, show_bgp_labelpool_chunks_cmd,
                }
                json = json_object_new_array();
        } else {
-               vty_out(vty, "First    Last\n");
-               vty_out(vty, "--------------\n");
+               vty_out(vty, "%10s %10s %10s %10s\n", "First", "Last", "Size",
+                       "nfree");
+               vty_out(vty, "-------------------------------------------\n");
        }
 
        for (ALL_LIST_ELEMENTS_RO(lp->chunks, node, chunk)) {
+               uint32_t size;
+
+               size = chunk->last - chunk->first + 1;
+
                if (uj) {
                        json_elem = json_object_new_object();
                        json_object_array_add(json, json_elem);
                        json_object_int_add(json_elem, "first", chunk->first);
                        json_object_int_add(json_elem, "last", chunk->last);
+                       json_object_int_add(json_elem, "size", size);
+                       json_object_int_add(json_elem, "numberFree",
+                                           chunk->nfree);
                } else
-                       vty_out(vty, "%-10u %-10u\n", chunk->first,
-                               chunk->last);
+                       vty_out(vty, "%10u %10u %10u %10u\n", chunk->first,
+                               chunk->last, size, chunk->nfree);
        }
        if (uj)
                vty_json(vty, json);
        return CMD_SUCCESS;
 }
 
+#if BGP_LABELPOOL_ENABLE_TESTS
+/*------------------------------------------------------------------------
+ *                     Testing code start
+ *------------------------------------------------------------------------*/
+
+DEFINE_MTYPE_STATIC(BGPD, LABELPOOL_TEST, "Label pool test");
+
+#define LPT_STAT_INSERT_FAIL 0
+#define LPT_STAT_DELETE_FAIL 1
+#define LPT_STAT_ALLOCATED 2
+#define LPT_STAT_DEALLOCATED 3
+#define LPT_STAT_MAX 4
+
+const char *lpt_counter_names[] = {
+       "sl insert failures",
+       "sl delete failures",
+       "labels allocated",
+       "labels deallocated",
+};
+
+static uint8_t lpt_generation;
+static bool lpt_inprogress;
+static struct skiplist *lp_tests;
+static unsigned int lpt_test_cb_tcb_lookup_fails;
+static unsigned int lpt_release_tcb_lookup_fails;
+static unsigned int lpt_test_event_tcb_lookup_fails;
+static unsigned int lpt_stop_tcb_lookup_fails;
+
+struct lp_test {
+       uint8_t generation;
+       unsigned int request_maximum;
+       unsigned int request_blocksize;
+       uintptr_t request_count; /* match type of labelid */
+       int label_type;
+       struct skiplist *labels;
+       struct timeval starttime;
+       struct skiplist *timestamps_alloc;
+       struct skiplist *timestamps_dealloc;
+       struct thread *event_thread;
+       unsigned int counter[LPT_STAT_MAX];
+};
+
+/* test parameters */
+#define LPT_MAX_COUNT 500000  /* get this many labels in all */
+#define LPT_BLKSIZE 10000     /* this many at a time, then yield */
+#define LPT_TS_INTERVAL 10000 /* timestamp every this many labels */
+
+
+static int test_cb(mpls_label_t label, void *labelid, bool allocated)
+{
+       uintptr_t generation;
+       struct lp_test *tcb;
+
+       generation = ((uintptr_t)labelid >> 24) & 0xff;
+
+       if (skiplist_search(lp_tests, (void *)generation, (void **)&tcb)) {
+
+               /* couldn't find current test in progress */
+               ++lpt_test_cb_tcb_lookup_fails;
+               return -1; /* reject allocation */
+       }
+
+       if (allocated) {
+               ++tcb->counter[LPT_STAT_ALLOCATED];
+               if (!(tcb->counter[LPT_STAT_ALLOCATED] % LPT_TS_INTERVAL)) {
+                       uintptr_t time_ms;
+
+                       time_ms = monotime_since(&tcb->starttime, NULL) / 1000;
+                       skiplist_insert(tcb->timestamps_alloc,
+                                       (void *)(uintptr_t)tcb
+                                               ->counter[LPT_STAT_ALLOCATED],
+                                       (void *)time_ms);
+               }
+               if (skiplist_insert(tcb->labels, labelid,
+                                   (void *)(uintptr_t)label)) {
+                       ++tcb->counter[LPT_STAT_INSERT_FAIL];
+                       return -1;
+               }
+       } else {
+               ++tcb->counter[LPT_STAT_DEALLOCATED];
+               if (!(tcb->counter[LPT_STAT_DEALLOCATED] % LPT_TS_INTERVAL)) {
+                       uintptr_t time_ms;
+
+                       time_ms = monotime_since(&tcb->starttime, NULL) / 1000;
+                       skiplist_insert(tcb->timestamps_dealloc,
+                                       (void *)(uintptr_t)tcb
+                                               ->counter[LPT_STAT_ALLOCATED],
+                                       (void *)time_ms);
+               }
+               if (skiplist_delete(tcb->labels, labelid, 0)) {
+                       ++tcb->counter[LPT_STAT_DELETE_FAIL];
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static void labelpool_test_event_handler(struct thread *thread)
+{
+       struct lp_test *tcb;
+
+       if (skiplist_search(lp_tests, (void *)(uintptr_t)(lpt_generation),
+                           (void **)&tcb)) {
+
+               /* couldn't find current test in progress */
+               ++lpt_test_event_tcb_lookup_fails;
+               return;
+       }
+
+       /*
+        * request a bunch of labels
+        */
+       for (unsigned int i = 0; (i < tcb->request_blocksize) &&
+                                (tcb->request_count < tcb->request_maximum);
+            ++i) {
+
+               uintptr_t id;
+
+               ++tcb->request_count;
+
+               /*
+                * construct 32-bit id from request_count and generation
+                */
+               id = ((uintptr_t)tcb->generation << 24) |
+                    (tcb->request_count & 0x00ffffff);
+               bgp_lp_get(LP_TYPE_VRF, (void *)id, test_cb);
+       }
+
+       if (tcb->request_count < tcb->request_maximum)
+               thread_add_event(bm->master, labelpool_test_event_handler, NULL,
+                                0, &tcb->event_thread);
+}
+
+static void lptest_stop(void)
+{
+       struct lp_test *tcb;
+
+       if (!lpt_inprogress)
+               return;
+
+       if (skiplist_search(lp_tests, (void *)(uintptr_t)(lpt_generation),
+                           (void **)&tcb)) {
+
+               /* couldn't find current test in progress */
+               ++lpt_stop_tcb_lookup_fails;
+               return;
+       }
+
+       if (tcb->event_thread)
+               thread_cancel(&tcb->event_thread);
+
+       lpt_inprogress = false;
+}
+
+static int lptest_start(struct vty *vty)
+{
+       struct lp_test *tcb;
+
+       if (lpt_inprogress) {
+               vty_out(vty, "test already in progress\n");
+               return -1;
+       }
+
+       if (skiplist_count(lp_tests) >=
+           (1 << (8 * sizeof(lpt_generation))) - 1) {
+               /*
+                * Too many test runs
+                */
+               vty_out(vty, "too many tests: clear first\n");
+               return -1;
+       }
+
+       /*
+        * We pack the generation and request number into the labelid;
+        * make sure they fit.
+        */
+       unsigned int n1 = LPT_MAX_COUNT;
+       unsigned int sh = 0;
+       unsigned int label_bits;
+
+       label_bits = 8 * (sizeof(tcb->request_count) - sizeof(lpt_generation));
+
+       /* n1 should be same type as tcb->request_maximum */
+       assert(sizeof(n1) == sizeof(tcb->request_maximum));
+
+       while (n1 >>= 1)
+               ++sh;
+       sh += 1; /* number of bits needed to hold LPT_MAX_COUNT */
+
+       if (sh > label_bits) {
+               vty_out(vty,
+                       "Sorry, test iteration count too big on this platform (LPT_MAX_COUNT %u, need %u bits, but label_bits is only %u)\n",
+                       LPT_MAX_COUNT, sh, label_bits);
+               return -1;
+       }
+
+       lpt_inprogress = true;
+       ++lpt_generation;
+
+       tcb = XCALLOC(MTYPE_LABELPOOL_TEST, sizeof(*tcb));
+
+       tcb->generation = lpt_generation;
+       tcb->label_type = LP_TYPE_VRF;
+       tcb->request_maximum = LPT_MAX_COUNT;
+       tcb->request_blocksize = LPT_BLKSIZE;
+       tcb->labels = skiplist_new(0, NULL, NULL);
+       tcb->timestamps_alloc = skiplist_new(0, NULL, NULL);
+       tcb->timestamps_dealloc = skiplist_new(0, NULL, NULL);
+       thread_add_event(bm->master, labelpool_test_event_handler, NULL, 0,
+                        &tcb->event_thread);
+       monotime(&tcb->starttime);
+
+       skiplist_insert(lp_tests, (void *)(uintptr_t)tcb->generation, tcb);
+       return 0;
+}
+
+DEFPY(start_labelpool_perf_test, start_labelpool_perf_test_cmd,
+      "debug bgp lptest start",
+      DEBUG_STR BGP_STR
+      "label pool test\n"
+      "start\n")
+{
+       lptest_start(vty);
+       return CMD_SUCCESS;
+}
+
+static void lptest_print_stats(struct vty *vty, struct lp_test *tcb)
+{
+       unsigned int i;
+
+       vty_out(vty, "Global Lookup Failures in test_cb: %5u\n",
+               lpt_test_cb_tcb_lookup_fails);
+       vty_out(vty, "Global Lookup Failures in release: %5u\n",
+               lpt_release_tcb_lookup_fails);
+       vty_out(vty, "Global Lookup Failures in event:   %5u\n",
+               lpt_test_event_tcb_lookup_fails);
+       vty_out(vty, "Global Lookup Failures in stop:    %5u\n",
+               lpt_stop_tcb_lookup_fails);
+       vty_out(vty, "\n");
+
+       if (!tcb) {
+               if (skiplist_search(lp_tests, (void *)(uintptr_t)lpt_generation,
+                                   (void **)&tcb)) {
+                       vty_out(vty, "Error: can't find test %u\n",
+                               lpt_generation);
+                       return;
+               }
+       }
+
+       vty_out(vty, "Test Generation %u:\n", tcb->generation);
+
+       vty_out(vty, "Counter   Value\n");
+       for (i = 0; i < LPT_STAT_MAX; ++i) {
+               vty_out(vty, "%20s: %10u\n", lpt_counter_names[i],
+                       tcb->counter[i]);
+       }
+       vty_out(vty, "\n");
+
+       if (tcb->timestamps_alloc) {
+               void *Key;
+               void *Value;
+               void *cursor;
+
+               float elapsed;
+
+               vty_out(vty, "%10s %10s\n", "Count", "Seconds");
+
+               cursor = NULL;
+               while (!skiplist_next(tcb->timestamps_alloc, &Key, &Value,
+                                     &cursor)) {
+
+                       elapsed = ((float)(uintptr_t)Value) / 1000;
+
+                       vty_out(vty, "%10llu %10.3f\n",
+                               (unsigned long long)(uintptr_t)Key, elapsed);
+               }
+               vty_out(vty, "\n");
+       }
+}
+
+DEFPY(show_labelpool_perf_test, show_labelpool_perf_test_cmd,
+      "debug bgp lptest show",
+      DEBUG_STR BGP_STR
+      "label pool test\n"
+      "show\n")
+{
+
+       if (lp_tests) {
+               void *Key;
+               void *Value;
+               void *cursor;
+
+               cursor = NULL;
+               while (!skiplist_next(lp_tests, &Key, &Value, &cursor)) {
+                       lptest_print_stats(vty, (struct lp_test *)Value);
+               }
+       } else {
+               vty_out(vty, "no test results\n");
+       }
+       return CMD_SUCCESS;
+}
+
+DEFPY(stop_labelpool_perf_test, stop_labelpool_perf_test_cmd,
+      "debug bgp lptest stop",
+      DEBUG_STR BGP_STR
+      "label pool test\n"
+      "stop\n")
+{
+
+       if (lpt_inprogress) {
+               lptest_stop();
+               lptest_print_stats(vty, NULL);
+       } else {
+               vty_out(vty, "no test in progress\n");
+       }
+       return CMD_SUCCESS;
+}
+
+DEFPY(clear_labelpool_perf_test, clear_labelpool_perf_test_cmd,
+      "debug bgp lptest clear",
+      DEBUG_STR BGP_STR
+      "label pool test\n"
+      "clear\n")
+{
+
+       if (lpt_inprogress) {
+               lptest_stop();
+       }
+       if (lp_tests) {
+               while (!skiplist_first(lp_tests, NULL, NULL))
+                       /* del function of skiplist cleans up tcbs */
+                       skiplist_delete_first(lp_tests);
+       }
+       return CMD_SUCCESS;
+}
+
+/*
+ * With the "release" command, we can release labels at intervals through
+ * the ID space. Thus we can to exercise the bitfield-wrapping behavior
+ * of the allocator in a subsequent test.
+ */
+/* clang-format off */
+DEFPY(release_labelpool_perf_test, release_labelpool_perf_test_cmd,
+      "debug bgp lptest release test GENERATION$generation every (1-5)$every_nth",
+      DEBUG_STR
+      BGP_STR
+      "label pool test\n"
+      "release labels\n"
+      "\"test\"\n"
+      "test number\n"
+      "\"every\"\n"
+      "label fraction denominator\n")
+{
+       /* clang-format on */
+
+       unsigned long testnum;
+       char *end;
+       struct lp_test *tcb;
+
+       testnum = strtoul(generation, &end, 0);
+       if (*end) {
+               vty_out(vty, "Invalid test number: \"%s\"\n", generation);
+               return CMD_SUCCESS;
+       }
+       if (lpt_inprogress && (testnum == lpt_generation)) {
+               vty_out(vty,
+                       "Error: Test %lu is still in progress (stop first)\n",
+                       testnum);
+               return CMD_SUCCESS;
+       }
+
+       if (skiplist_search(lp_tests, (void *)(uintptr_t)testnum,
+                           (void **)&tcb)) {
+
+               /* couldn't find current test in progress */
+               vty_out(vty, "Error: Can't look up test number: \"%lu\"\n",
+                       testnum);
+               ++lpt_release_tcb_lookup_fails;
+               return CMD_SUCCESS;
+       }
+
+       void *Key, *cKey;
+       void *Value, *cValue;
+       void *cursor;
+       unsigned int iteration;
+       int rc;
+
+       cursor = NULL;
+       iteration = 0;
+       rc = skiplist_next(tcb->labels, &Key, &Value, &cursor);
+
+       while (!rc) {
+               cKey = Key;
+               cValue = Value;
+
+               /* find next item before we delete this one */
+               rc = skiplist_next(tcb->labels, &Key, &Value, &cursor);
+
+               if (!(iteration % every_nth)) {
+                       bgp_lp_release(tcb->label_type, cKey,
+                                      (mpls_label_t)(uintptr_t)cValue);
+                       skiplist_delete(tcb->labels, cKey, NULL);
+                       ++tcb->counter[LPT_STAT_DEALLOCATED];
+               }
+               ++iteration;
+       }
+
+       return CMD_SUCCESS;
+}
+
+static void lptest_delete(void *val)
+{
+       struct lp_test *tcb = (struct lp_test *)val;
+       void *Key;
+       void *Value;
+       void *cursor;
+
+       if (tcb->labels) {
+               cursor = NULL;
+               while (!skiplist_next(tcb->labels, &Key, &Value, &cursor))
+                       bgp_lp_release(tcb->label_type, Key,
+                                      (mpls_label_t)(uintptr_t)Value);
+               skiplist_free(tcb->labels);
+               tcb->labels = NULL;
+       }
+       if (tcb->timestamps_alloc) {
+               cursor = NULL;
+               skiplist_free(tcb->timestamps_alloc);
+               tcb->timestamps_alloc = NULL;
+       }
+
+       if (tcb->timestamps_dealloc) {
+               cursor = NULL;
+               skiplist_free(tcb->timestamps_dealloc);
+               tcb->timestamps_dealloc = NULL;
+       }
+
+       if (tcb->event_thread)
+               thread_cancel(&tcb->event_thread);
+
+       memset(tcb, 0, sizeof(*tcb));
+
+       XFREE(MTYPE_LABELPOOL_TEST, tcb);
+}
+
+static void lptest_init(void)
+{
+       lp_tests = skiplist_new(0, NULL, lptest_delete);
+}
+
+static void lptest_finish(void)
+{
+       if (lp_tests) {
+               skiplist_free(lp_tests);
+               lp_tests = NULL;
+       }
+}
+
+/*------------------------------------------------------------------------
+ *                     Testing code end
+ *------------------------------------------------------------------------*/
+#endif /* BGP_LABELPOOL_ENABLE_TESTS */
+
 void bgp_lp_vty_init(void)
 {
        install_element(VIEW_NODE, &show_bgp_labelpool_summary_cmd);
@@ -963,4 +1548,12 @@ void bgp_lp_vty_init(void)
        install_element(VIEW_NODE, &show_bgp_labelpool_inuse_cmd);
        install_element(VIEW_NODE, &show_bgp_labelpool_requests_cmd);
        install_element(VIEW_NODE, &show_bgp_labelpool_chunks_cmd);
+
+#if BGP_LABELPOOL_ENABLE_TESTS
+       install_element(ENABLE_NODE, &start_labelpool_perf_test_cmd);
+       install_element(ENABLE_NODE, &show_labelpool_perf_test_cmd);
+       install_element(ENABLE_NODE, &stop_labelpool_perf_test_cmd);
+       install_element(ENABLE_NODE, &release_labelpool_perf_test_cmd);
+       install_element(ENABLE_NODE, &clear_labelpool_perf_test_cmd);
+#endif /* BGP_LABELPOOL_ENABLE_TESTS */
 }
index d6a8eec84d8d364b548c1552d38edf1600da5a4f..2f3ffe437fa6ca4e2ee298fa42e969945bafd372 100644 (file)
@@ -41,6 +41,7 @@ struct labelpool {
        struct work_queue       *callback_q;
        uint32_t                pending_count;  /* requested from zebra */
        uint32_t reconnect_count;               /* zebra reconnections */
+       uint32_t next_chunksize;                /* request this many labels */
 };
 
 extern void bgp_lp_init(struct thread_master *master, struct labelpool *pool);
index 9e3a095529e6f99eae6706a501b72c61345b5662..b1eeb937e180b0cc5ec607392763489c332fa893 100644 (file)
@@ -232,6 +232,7 @@ clippy_scan += \
        bgpd/bgp_bmp.c \
        bgpd/bgp_debug.c \
        bgpd/bgp_evpn_vty.c \
+       bgpd/bgp_labelpool.c \
        bgpd/bgp_route.c \
        bgpd/bgp_routemap.c \
        bgpd/bgp_rpki.c \
index a3f361ed9de66babb90724cdb2de8f761ce70b7f..9af4053dafb987c7e0f7fb0e8859df46b7b6a201 100644 (file)
@@ -158,6 +158,75 @@ DECLARE_MTYPE(BITFIELD);
                (b) += (w * WORD_SIZE);                                        \
        } while (0)
 
+/*
+ * Find a clear bit in v and return it
+ * Start looking in the word containing bit position start_index.
+ * If necessary, wrap around after bit position max_index.
+ */
+static inline unsigned int
+bf_find_next_clear_bit_wrap(bitfield_t *v, word_t start_index, word_t max_index)
+{
+       int start_bit;
+       unsigned long i, offset, scanbits, wordcount_max, index_max;
+
+       if (start_index > max_index)
+               start_index = 0;
+
+       start_bit = start_index & (WORD_SIZE - 1);
+       wordcount_max = bf_index(max_index) + 1;
+
+       scanbits = WORD_SIZE;
+       for (i = bf_index(start_index); i < v->m; ++i) {
+               if (v->data[i] == WORD_MAX) {
+                       /* if the whole word is full move to the next */
+                       start_bit = 0;
+                       continue;
+               }
+               /* scan one word for clear bits */
+               if ((i == v->m - 1) && (v->m >= wordcount_max))
+                       /* max index could be only part of word */
+                       scanbits = (max_index % WORD_SIZE) + 1;
+               for (offset = start_bit; offset < scanbits; ++offset) {
+                       if (!((v->data[i] >> offset) & 1))
+                               return ((i * WORD_SIZE) + offset);
+               }
+               /* move to the next word */
+               start_bit = 0;
+       }
+
+       if (v->m < wordcount_max) {
+               /*
+                * We can expand bitfield, so no need to wrap.
+                * Return the index of the first bit of the next word.
+                * Assumption is that caller will call bf_set_bit which
+                * will allocate additional space.
+                */
+               v->m += 1;
+               v->data = (word_t *)realloc(v->data, v->m * sizeof(word_t));
+               v->data[v->m - 1] = 0;
+               return v->m * WORD_SIZE;
+       }
+
+       /*
+        * start looking for a clear bit at the start of the bitfield and
+        * stop when we reach start_index
+        */
+       scanbits = WORD_SIZE;
+       index_max = bf_index(start_index - 1);
+       for (i = 0; i <= index_max; ++i) {
+               if (i == index_max)
+                       scanbits = ((start_index - 1) % WORD_SIZE) + 1;
+               for (offset = start_bit; offset < scanbits; ++offset) {
+                       if (!((v->data[i] >> offset) & 1))
+                               return ((i * WORD_SIZE) + offset);
+               }
+               /* move to the next word */
+               start_bit = 0;
+       }
+
+       return WORD_MAX;
+}
+
 static inline unsigned int bf_find_next_set_bit(bitfield_t v,
                word_t start_index)
 {
index 29e6c2cbf702063e7e69eb6de19e0326cf4082c5..e2eee513e6d7f9c4e5db38ecbbdbbf0d66b4a6a2 100644 (file)
@@ -2,7 +2,7 @@
   "Ledger":506,
   "InUse":506,
   "Requests":0,
-  "LabelChunks":11,
+  "LabelChunks":3,
   "Pending":0,
   "Reconnects":0
 }
index 77705e8d35cc8dae2ba93b057ae602845b1323c4..0dc59b58cfb3f4e7cd61a8e1062bbe2eb076dbf1 100644 (file)
@@ -2,7 +2,7 @@
   "Ledger":51,
   "InUse":51,
   "Requests":0,
-  "LabelChunks":2,
+  "LabelChunks":1,
   "Pending":0,
   "Reconnects":0
 }