ceph/src/spdk/dpdk/lib/librte_distributor/rte_distributor_private.h

   1 /* SPDX-License-Identifier: BSD-3-Clause
   2  * Copyright(c) 2017 Intel Corporation
   3  */
   4
   5 #ifndef _RTE_DIST_PRIV_H_
   6 #define _RTE_DIST_PRIV_H_
   7
   8 /**
   9  * @file
  10  * RTE distributor
  11  *
  12  * The distributor is a component which is designed to pass packets
  13  * one-at-a-time to workers, with dynamic load balancing.
  14  */
  15
  16 #ifdef __cplusplus
  17 extern "C" {
  18 #endif
  19
  20 #define NO_FLAGS 0
  21 #define RTE_DISTRIB_PREFIX "DT_"
  22
  23 /*
  24  * We will use the bottom four bits of pointer for flags, shifting out
  25  * the top four bits to make room (since a 64-bit pointer actually only uses
  26  * 48 bits). An arithmetic-right-shift will then appropriately restore the
  27  * original pointer value with proper sign extension into the top bits.
  28  */
  29 #define RTE_DISTRIB_FLAG_BITS 4
  30 #define RTE_DISTRIB_FLAGS_MASK (0x0F)
  31 #define RTE_DISTRIB_NO_BUF 0       /**< empty flags: no buffer requested */
  32 #define RTE_DISTRIB_GET_BUF (1)    /**< worker requests a buffer, returns old */
  33 #define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */
  34 #define RTE_DISTRIB_VALID_BUF (4)  /**< set if bufptr contains ptr */
  35
  36 #define RTE_DISTRIB_BACKLOG_SIZE 8
  37 #define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1)
  38
  39 #define RTE_DISTRIB_MAX_RETURNS 128
  40 #define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1)
  41
  42 /**
  43  * Maximum number of workers allowed.
  44  * Be aware of increasing the limit, because it is limited by how we track
  45  * in-flight tags. See in_flight_bitmask and rte_distributor_process
  46  */
  47 #define RTE_DISTRIB_MAX_WORKERS 64
  48
  49 #define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */
  50
  51 /**
  52  * Buffer structure used to pass the pointer data between cores. This is cache
  53  * line aligned, but to improve performance and prevent adjacent cache-line
  54  * prefetches of buffers for other workers, e.g. when worker 1's buffer is on
  55  * the next cache line to worker 0, we pad this out to three cache lines.
  56  * Only 64-bits of the memory is actually used though.
  57  */
  58 union rte_distributor_buffer_v20 {
  59         volatile int64_t bufptr64;
  60         char pad[RTE_CACHE_LINE_SIZE*3];
  61 } __rte_cache_aligned;
  62
  63 /*
  64  * Transfer up to 8 mbufs at a time to/from workers, and
  65  * flow matching algorithm optimized for 8 flow IDs at a time
  66  */
  67 #define RTE_DIST_BURST_SIZE 8
  68
  69 struct rte_distributor_backlog {
  70         unsigned int start;
  71         unsigned int count;
  72         int64_t pkts[RTE_DIST_BURST_SIZE] __rte_cache_aligned;
  73         uint16_t *tags; /* will point to second cacheline of inflights */
  74 } __rte_cache_aligned;
  75
  76
  77 struct rte_distributor_returned_pkts {
  78         unsigned int start;
  79         unsigned int count;
  80         struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS];
  81 };
  82
  83 struct rte_distributor_v20 {
  84         TAILQ_ENTRY(rte_distributor_v20) next;    /**< Next in list. */
  85
  86         char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
  87         unsigned int num_workers;             /**< Number of workers polling */
  88
  89         uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS];
  90                 /**< Tracks the tag being processed per core */
  91         uint64_t in_flight_bitmask;
  92                 /**< on/off bits for in-flight tags.
  93                  * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then
  94                  * the bitmask has to expand.
  95                  */
  96
  97         struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS];
  98
  99         union rte_distributor_buffer_v20 bufs[RTE_DISTRIB_MAX_WORKERS];
 100
 101         struct rte_distributor_returned_pkts returns;
 102 };
 103
 104 /* All different signature compare functions */
 105 enum rte_distributor_match_function {
 106         RTE_DIST_MATCH_SCALAR = 0,
 107         RTE_DIST_MATCH_VECTOR,
 108         RTE_DIST_NUM_MATCH_FNS
 109 };
 110
 111 /**
 112  * Buffer structure used to pass the pointer data between cores. This is cache
 113  * line aligned, but to improve performance and prevent adjacent cache-line
 114  * prefetches of buffers for other workers, e.g. when worker 1's buffer is on
 115  * the next cache line to worker 0, we pad this out to two cache lines.
 116  * We can pass up to 8 mbufs at a time in one cacheline.
 117  * There is a separate cacheline for returns in the burst API.
 118  */
 119 struct rte_distributor_buffer {
 120         volatile int64_t bufptr64[RTE_DIST_BURST_SIZE]
 121                 __rte_cache_aligned; /* <= outgoing to worker */
 122
 123         int64_t pad1 __rte_cache_aligned;    /* <= one cache line  */
 124
 125         volatile int64_t retptr64[RTE_DIST_BURST_SIZE]
 126                 __rte_cache_aligned; /* <= incoming from worker */
 127
 128         int64_t pad2 __rte_cache_aligned;    /* <= one cache line  */
 129
 130         int count __rte_cache_aligned;       /* <= number of current mbufs */
 131 };
 132
 133 struct rte_distributor {
 134         TAILQ_ENTRY(rte_distributor) next;    /**< Next in list. */
 135
 136         char name[RTE_DISTRIBUTOR_NAMESIZE];  /**< Name of the ring. */
 137         unsigned int num_workers;             /**< Number of workers polling */
 138         unsigned int alg_type;                /**< Number of alg types */
 139
 140         /**>
 141          * First cache line in the this array are the tags inflight
 142          * on the worker core. Second cache line are the backlog
 143          * that are going to go to the worker core.
 144          */
 145         uint16_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS][RTE_DIST_BURST_SIZE*2]
 146                         __rte_cache_aligned;
 147
 148         struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]
 149                         __rte_cache_aligned;
 150
 151         struct rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS];
 152
 153         struct rte_distributor_returned_pkts returns;
 154
 155         enum rte_distributor_match_function dist_match_fn;
 156
 157         struct rte_distributor_v20 *d_v20;
 158 };
 159
 160 void
 161 find_match_scalar(struct rte_distributor *d,
 162                         uint16_t *data_ptr,
 163                         uint16_t *output_ptr);
 164
 165 void
 166 find_match_vec(struct rte_distributor *d,
 167                         uint16_t *data_ptr,
 168                         uint16_t *output_ptr);
 169
 170 #ifdef __cplusplus
 171 }
 172 #endif
 173
 174 #endif