/*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016, 2017 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "fat-rwlock.h"
#include "flow.h"
#include "hmapx.h"
+#include "id-pool.h"
#include "latch.h"
#include "netdev.h"
#include "netdev-vport.h"
static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
#define DP_NETDEV_CS_SUPPORTED_MASK (CS_NEW | CS_ESTABLISHED | CS_RELATED \
- | CS_INVALID | CS_REPLY_DIR | CS_TRACKED)
+ | CS_INVALID | CS_REPLY_DIR | CS_TRACKED \
+ | CS_SRC_NAT | CS_DST_NAT)
#define DP_NETDEV_CS_UNSUPPORTED_MASK (~(uint32_t)DP_NETDEV_CS_SUPPORTED_MASK)
static struct odp_support dp_netdev_support = {
.ct_zone = true,
.ct_mark = true,
.ct_label = true,
+ .ct_state_nat = true,
+ .ct_orig_tuple = true,
+ .ct_orig_tuple6 = true,
};
/* Stores a miniflow with inline values */
struct ovs_mutex meter_locks[N_METER_LOCKS];
struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
+ /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
+ OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
+
/* Protects access to ofproto-dpif-upcall interface during revalidator
* thread synchronization. */
struct fat_rwlock upcall_rwlock;
/* Stores all 'struct dp_netdev_pmd_thread's. */
struct cmap poll_threads;
+ /* id pool for per thread static_tx_qid. */
+ struct id_pool *tx_qid_pool;
+ struct ovs_mutex tx_qid_pool_mutex;
/* Protects the access of the 'struct dp_netdev_pmd_thread'
* instance for non-pmd thread. */
uint64_t last_tnl_conf_seq;
struct conntrack conntrack;
-
- /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
- OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
};
static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
};
enum pmd_cycles_counter_type {
- PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
- PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */
+ PMD_CYCLES_IDLE, /* Cycles spent idle or unsuccessful polling */
+ PMD_CYCLES_PROCESSING, /* Cycles spent successfully polling and
+ * processing polled packets */
PMD_N_CYCLES
};
struct hmap_node node; /* Node in dp_netdev's 'ports'. */
struct netdev_saved_flags *sf;
struct dp_netdev_rxq *rxqs;
- unsigned n_rxq; /* Number of elements in 'rxq' */
+ unsigned n_rxq; /* Number of elements in 'rxqs' */
bool dynamic_txqs; /* If true XPS will be used. */
unsigned *txq_used; /* Number of threads that use each tx queue. */
struct ovs_mutex txq_used_mutex;
* I/O of all non-pmd threads. There will be no actual thread created
* for the instance.
*
- * Each struct has its own flow table and classifier. Packets received
- * from managed ports are looked up in the corresponding pmd thread's
- * flow table, and are executed with the found actions.
+ * Each struct has its own flow cache and classifier per managed ingress port.
+ * For packets received on ingress port, a look up is done on corresponding PMD
+ * thread's flow cache and in case of a miss, lookup is performed in the
+ * corresponding classifier of port. Packets are executed with the found
+ * actions in either case.
* */
struct dp_netdev_pmd_thread {
struct dp_netdev *dp;
/* Queue id used by this pmd thread to send packets on all netdevs if
* XPS disabled for this netdev. All static_tx_qid's are unique and less
* than 'cmap_count(dp->poll_threads)'. */
- const int static_tx_qid;
+ uint32_t static_tx_qid;
struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
/* List of rx queues to poll. */
unsigned core_id);
static struct dp_netdev_pmd_thread *
dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
+static void dp_netdev_del_pmd(struct dp_netdev *dp,
+ struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd);
static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
}
ds_put_format(reply,
- "\tpolling cycles:%"PRIu64" (%.02f%%)\n"
+ "\tidle cycles:%"PRIu64" (%.02f%%)\n"
"\tprocessing cycles:%"PRIu64" (%.02f%%)\n",
- cycles[PMD_CYCLES_POLLING],
- cycles[PMD_CYCLES_POLLING] / (double)total_cycles * 100,
+ cycles[PMD_CYCLES_IDLE],
+ cycles[PMD_CYCLES_IDLE] / (double)total_cycles * 100,
cycles[PMD_CYCLES_PROCESSING],
cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
i++;
}
ovs_assert(i == *n);
+ qsort(ret, *n, sizeof *ret, compare_poll_list);
}
- qsort(ret, *n, sizeof *ret, compare_poll_list);
-
*list = ret;
}
}
}
+static int
+compare_poll_thread_list(const void *a_, const void *b_)
+{
+ const struct dp_netdev_pmd_thread *a, *b;
+
+ a = *(struct dp_netdev_pmd_thread **)a_;
+ b = *(struct dp_netdev_pmd_thread **)b_;
+
+ if (a->core_id < b->core_id) {
+ return -1;
+ }
+ if (a->core_id > b->core_id) {
+ return 1;
+ }
+ return 0;
+}
+
+/* Create a sorted list of pmd's from the dp->poll_threads cmap. We can use
+ * this list, as long as we do not go to quiescent state. */
+static void
+sorted_poll_thread_list(struct dp_netdev *dp,
+ struct dp_netdev_pmd_thread ***list,
+ size_t *n)
+{
+ struct dp_netdev_pmd_thread *pmd;
+ struct dp_netdev_pmd_thread **pmd_list;
+ size_t k = 0, n_pmds;
+
+ n_pmds = cmap_count(&dp->poll_threads);
+ pmd_list = xcalloc(n_pmds, sizeof *pmd_list);
+
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ if (k >= n_pmds) {
+ break;
+ }
+ pmd_list[k++] = pmd;
+ }
+
+ qsort(pmd_list, k, sizeof *pmd_list, compare_poll_thread_list);
+
+ *list = pmd_list;
+ *n = k;
+}
+
static void
dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
void *aux)
{
struct ds reply = DS_EMPTY_INITIALIZER;
- struct dp_netdev_pmd_thread *pmd;
+ struct dp_netdev_pmd_thread **pmd_list;
struct dp_netdev *dp = NULL;
+ size_t n;
enum pmd_info_type type = *(enum pmd_info_type *) aux;
ovs_mutex_lock(&dp_netdev_mutex);
return;
}
- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ sorted_poll_thread_list(dp, &pmd_list, &n);
+ for (size_t i = 0; i < n; i++) {
+ struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+ if (!pmd) {
+ break;
+ }
+
if (type == PMD_INFO_SHOW_RXQ) {
pmd_info_show_rxq(&reply, pmd);
} else {
}
}
}
+ free(pmd_list);
ovs_mutex_unlock(&dp_netdev_mutex);
atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
cmap_init(&dp->poll_threads);
+
+ ovs_mutex_init(&dp->tx_qid_pool_mutex);
+ /* We need 1 Tx queue for each possible core + 1 for non-PMD threads. */
+ dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1);
+
ovs_mutex_init_recursive(&dp->non_pmd_mutex);
ovsthread_key_create(&dp->per_pmd_key, NULL);
ovs_mutex_lock(&dp->port_mutex);
+ /* non-PMD will be created before all other threads and will
+ * allocate static_tx_qid = 0. */
dp_netdev_set_nonpmd(dp);
error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
dp_netdev_destroy_all_pmds(dp, true);
cmap_destroy(&dp->poll_threads);
+ ovs_mutex_destroy(&dp->tx_qid_pool_mutex);
+ id_pool_destroy(dp->tx_qid_pool);
+
ovs_mutex_destroy(&dp->non_pmd_mutex);
ovsthread_key_delete(dp->per_pmd_key);
offsetof(struct netdev_flow_key, mf) + src->len);
}
-/* Slow. */
-static void
-netdev_flow_key_from_flow(struct netdev_flow_key *dst,
- const struct flow *src)
-{
- struct dp_packet packet;
- uint64_t buf_stub[512 / 8];
-
- dp_packet_use_stub(&packet, buf_stub, sizeof buf_stub);
- pkt_metadata_from_flow(&packet.md, src);
- flow_compose(&packet, src);
- miniflow_extract(&packet, &dst->mf);
- dp_packet_uninit(&packet);
-
- dst->len = netdev_flow_key_size(miniflow_n_values(&dst->mf));
- dst->hash = 0; /* Not computed yet. */
-}
-
/* Initialize a netdev_flow_key 'mask' from 'match'. */
static inline void
netdev_flow_mask_init(struct netdev_flow_key *mask,
uint32_t min;
atomic_read_relaxed(&pmd->dp->emc_insert_min, &min);
-#ifdef DPDK_NETDEV
- if (min && (key->hash ^ (uint32_t) pmd->last_cycles) <= min) {
-#else
- if (min && (key->hash ^ random_uint32()) <= min) {
-#endif
+ if (min && random_uint32() <= min) {
emc_insert(&pmd->flow_cache, key, flow);
}
}
dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
struct flow *flow, bool probe)
{
- odp_port_t in_port;
-
if (odp_flow_key_to_flow(key, key_len, flow)) {
if (!probe) {
/* This should not happen: it indicates that
return EINVAL;
}
- in_port = flow->in_port.odp_port;
- if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
- return EINVAL;
- }
-
if (flow->ct_state & DP_NETDEV_CS_UNSUPPORTED_MASK) {
return EINVAL;
}
cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
dp_netdev_flow_hash(&flow->ufid));
- if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
+ if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
struct ds ds = DS_EMPTY_INITIALIZER;
struct ofpbuf key_buf, mask_buf;
struct odp_flow_key_parms odp_parms = {
mask_buf.data, mask_buf.size,
NULL, &ds, false);
ds_put_cstr(&ds, ", actions:");
- format_odp_actions(&ds, actions, actions_len);
+ format_odp_actions(&ds, actions, actions_len, NULL);
- VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds));
+ VLOG_DBG("%s", ds_cstr(&ds));
ofpbuf_uninit(&key_buf);
ofpbuf_uninit(&mask_buf);
+
+ /* Add a printout of the actual match installed. */
+ struct match m;
+ ds_clear(&ds);
+ ds_put_cstr(&ds, "flow match: ");
+ miniflow_expand(&flow->cr.flow.mf, &m.flow);
+ miniflow_expand(&flow->cr.mask->mf, &m.wc.masks);
+ memset(&m.tun_md, 0, sizeof m.tun_md);
+ match_format(&m, NULL, &ds, OFP_DEFAULT_PRIORITY);
+
+ VLOG_DBG("%s", ds_cstr(&ds));
+
ds_destroy(&ds);
}
error = ENOENT;
}
} else {
- if (put->flags & DPIF_FP_MODIFY
- && flow_equal(&match->flow, &netdev_flow->flow)) {
+ if (put->flags & DPIF_FP_MODIFY) {
struct dp_netdev_actions *new_actions;
struct dp_netdev_actions *old_actions;
dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
- struct netdev_flow_key key;
+ struct netdev_flow_key key, mask;
struct dp_netdev_pmd_thread *pmd;
struct match match;
ovs_u128 ufid;
}
/* Must produce a netdev_flow_key for lookup.
- * This interface is no longer performance critical, since it is not used
- * for upcall processing any more. */
- netdev_flow_key_from_flow(&key, &match.flow);
+ * Use the same method as employed to create the key when adding
+ * the flow to the dplcs to make sure they match. */
+ netdev_flow_mask_init(&mask, &match);
+ netdev_flow_key_init_masked(&key, &match.flow, &mask);
if (put->pmd_id == PMD_ID_NULL) {
if (cmap_count(&dp->poll_threads) == 0) {
}
static struct dpif_flow_dump *
-dpif_netdev_flow_dump_create(const struct dpif *dpif_, bool terse)
+dpif_netdev_flow_dump_create(const struct dpif *dpif_, bool terse,
+ char *type OVS_UNUSED)
{
struct dpif_netdev_flow_dump *dump;
\f
/* Creates and returns a new 'struct dp_netdev_actions', whose actions are
- * a copy of the 'ofpacts_len' bytes of 'ofpacts'. */
+ * a copy of the 'size' bytes of 'actions' input parameters. */
struct dp_netdev_actions *
dp_netdev_actions_create(const struct nlattr *actions, size_t size)
{
non_atomic_ullong_add(&pmd->cycles.n[type], interval);
}
-static void
+/* Calculate the intermediate cycle result and add to the counter 'type' */
+static inline void
+cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd,
+ enum pmd_cycles_counter_type type)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+ unsigned long long new_cycles = cycles_counter();
+ unsigned long long interval = new_cycles - pmd->last_cycles;
+ pmd->last_cycles = new_cycles;
+
+ non_atomic_ullong_add(&pmd->cycles.n[type], interval);
+}
+
+static int
dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct netdev_rxq *rx,
odp_port_t port_no)
{
struct dp_packet_batch batch;
int error;
+ int batch_cnt = 0;
dp_packet_batch_init(&batch);
- cycles_count_start(pmd);
error = netdev_rxq_recv(rx, &batch);
- cycles_count_end(pmd, PMD_CYCLES_POLLING);
if (!error) {
*recirc_depth_get() = 0;
- cycles_count_start(pmd);
+ batch_cnt = batch.count;
dp_netdev_input(pmd, &batch, port_no);
- cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
} else if (error != EAGAIN && error != EOPNOTSUPP) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
netdev_rxq_get_name(rx), ovs_strerror(error));
}
+
+ return batch_cnt;
}
static struct tx_port *
return NULL;
}
+/* Returns the next node in numa list following 'numa' in round-robin fashion.
+ * Returns first node if 'numa' is a null pointer or the last node in 'rr'.
+ * Returns NULL if 'rr' numa list is empty. */
+static struct rr_numa *
+rr_numa_list_next(struct rr_numa_list *rr, const struct rr_numa *numa)
+{
+ struct hmap_node *node = NULL;
+
+ if (numa) {
+ node = hmap_next(&rr->numas, &numa->node);
+ }
+ if (!node) {
+ node = hmap_first(&rr->numas);
+ }
+
+ return (node) ? CONTAINER_OF(node, struct rr_numa, node) : NULL;
+}
+
static void
rr_numa_list_populate(struct dp_netdev *dp, struct rr_numa_list *rr)
{
{
struct dp_netdev_port *port;
struct rr_numa_list rr;
+ struct rr_numa *non_local_numa = NULL;
rr_numa_list_populate(dp, &rr);
}
} else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
if (!numa) {
- VLOG_WARN("There's no available (non isolated) pmd thread "
+ /* There are no pmds on the queue's local NUMA node.
+ Round-robin on the NUMA nodes that do have pmds. */
+ non_local_numa = rr_numa_list_next(&rr, non_local_numa);
+ if (!non_local_numa) {
+ VLOG_ERR("There is no available (non-isolated) pmd "
+ "thread for port \'%s\' queue %d. This queue "
+ "will not be polled. Is pmd-cpu-mask set to "
+ "zero? Or are all PMDs isolated to other "
+ "queues?", netdev_get_name(port->netdev),
+ qid);
+ continue;
+ }
+ q->pmd = rr_numa_get_pmd(non_local_numa);
+ VLOG_WARN("There's no available (non-isolated) pmd thread "
"on numa node %d. Queue %d on port \'%s\' will "
- "not be polled.",
- numa_id, qid, netdev_get_name(port->netdev));
+ "be assigned to the pmd on core %d "
+ "(numa node %d). Expect reduced performance.",
+ numa_id, qid, netdev_get_name(port->netdev),
+ q->pmd->core_id, q->pmd->numa_id);
} else {
+ /* Assign queue to the next (round-robin) PMD on it's local
+ NUMA node. */
q->pmd = rr_numa_get_pmd(numa);
}
}
rr_numa_list_destroy(&rr);
}
+static void
+reload_affected_pmds(struct dp_netdev *dp)
+{
+ struct dp_netdev_pmd_thread *pmd;
+
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ if (pmd->need_reload) {
+ dp_netdev_reload_pmd__(pmd);
+ pmd->need_reload = false;
+ }
+ }
+}
+
static void
reconfigure_pmd_threads(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex)
{
struct dp_netdev_pmd_thread *pmd;
struct ovs_numa_dump *pmd_cores;
+ struct ovs_numa_info_core *core;
+ struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
+ struct hmapx_node *node;
bool changed = false;
+ bool need_to_adjust_static_tx_qids = false;
/* The pmd threads should be started only if there's a pmd port in the
* datapath. If the user didn't provide any "pmd-cpu-mask", we start
pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
}
- /* Check for changed configuration */
- if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) {
- changed = true;
- } else {
- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
- if (pmd->core_id != NON_PMD_CORE_ID
- && !ovs_numa_dump_contains_core(pmd_cores,
- pmd->numa_id,
- pmd->core_id)) {
- changed = true;
- break;
- }
+ /* We need to adjust 'static_tx_qid's only if we're reducing number of
+ * PMD threads. Otherwise, new threads will allocate all the freed ids. */
+ if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp->poll_threads) - 1) {
+ /* Adjustment is required to keep 'static_tx_qid's sequential and
+ * avoid possible issues, for example, imbalanced tx queue usage
+ * and unnecessary locking caused by remapping on netdev level. */
+ need_to_adjust_static_tx_qids = true;
+ }
+
+ /* Check for unwanted pmd threads */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+ if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id,
+ pmd->core_id)) {
+ hmapx_add(&to_delete, pmd);
+ } else if (need_to_adjust_static_tx_qids) {
+ pmd->need_reload = true;
}
}
- /* Destroy the old and recreate the new pmd threads. We don't perform an
- * incremental update because we would have to adjust 'static_tx_qid'. */
- if (changed) {
- struct ovs_numa_info_core *core;
- struct ovs_numa_info_numa *numa;
+ HMAPX_FOR_EACH (node, &to_delete) {
+ pmd = (struct dp_netdev_pmd_thread *) node->data;
+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.",
+ pmd->numa_id, pmd->core_id);
+ dp_netdev_del_pmd(dp, pmd);
+ }
+ changed = !hmapx_is_empty(&to_delete);
+ hmapx_destroy(&to_delete);
- /* Do not destroy the non pmd thread. */
- dp_netdev_destroy_all_pmds(dp, false);
- FOR_EACH_CORE_ON_DUMP (core, pmd_cores) {
- struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
+ if (need_to_adjust_static_tx_qids) {
+ /* 'static_tx_qid's are not sequential now.
+ * Reload remaining threads to fix this. */
+ reload_affected_pmds(dp);
+ }
+ /* Check for required new pmd threads */
+ FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
+ pmd = dp_netdev_get_pmd(dp, core->core_id);
+ if (!pmd) {
+ pmd = xzalloc(sizeof *pmd);
dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
-
pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
+ VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
+ pmd->numa_id, pmd->core_id);
+ changed = true;
+ } else {
+ dp_netdev_pmd_unref(pmd);
}
+ }
+
+ if (changed) {
+ struct ovs_numa_info_numa *numa;
/* Log the number of pmd threads per numa node. */
FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) {
- VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d",
+ VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d",
numa->n_cores, numa->numa_id);
}
}
ovs_numa_dump_destroy(pmd_cores);
}
-static void
-reload_affected_pmds(struct dp_netdev *dp)
-{
- struct dp_netdev_pmd_thread *pmd;
-
- CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
- if (pmd->need_reload) {
- dp_netdev_reload_pmd__(pmd);
- pmd->need_reload = false;
- }
- }
-}
-
static void
pmd_remove_stale_ports(struct dp_netdev *dp,
struct dp_netdev_pmd_thread *pmd)
* need reconfiguration. */
/* Check for all the ports that need reconfiguration. We cache this in
- * 'port->reconfigure', because netdev_is_reconf_required() can change at
- * any time. */
+ * 'port->need_reconfigure', because netdev_is_reconf_required() can
+ * change at any time. */
HMAP_FOR_EACH (port, node, &dp->ports) {
if (netdev_is_reconf_required(port->netdev)) {
port->need_reconfigure = true;
/* We only reconfigure the ports that we determined above, because they're
* not being used by any pmd thread at the moment. If a port fails to
* reconfigure we remove it from the datapath. */
- HMAP_FOR_EACH (port, node, &dp->ports) {
+ struct dp_netdev_port *next_port;
+ HMAP_FOR_EACH_SAFE (port, next_port, node, &dp->ports) {
int err;
if (!port->need_reconfigure) {
struct dp_netdev *dp = get_dp_netdev(dpif);
struct dp_netdev_pmd_thread *non_pmd;
uint64_t new_tnl_seq;
+ int process_packets = 0;
ovs_mutex_lock(&dp->port_mutex);
non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
if (non_pmd) {
ovs_mutex_lock(&dp->non_pmd_mutex);
+ cycles_count_start(non_pmd);
HMAP_FOR_EACH (port, node, &dp->ports) {
if (!netdev_is_pmd(port->netdev)) {
int i;
for (i = 0; i < port->n_rxq; i++) {
- dp_netdev_process_rxq_port(non_pmd, port->rxqs[i].rx,
- port->port_no);
+ process_packets =
+ dp_netdev_process_rxq_port(non_pmd,
+ port->rxqs[i].rx,
+ port->port_no);
+ cycles_count_intermediate(non_pmd, process_packets ?
+ PMD_CYCLES_PROCESSING
+ : PMD_CYCLES_IDLE);
}
}
}
+ cycles_count_end(non_pmd, PMD_CYCLES_IDLE);
dpif_netdev_xps_revalidate_pmd(non_pmd, time_msec(), false);
ovs_mutex_unlock(&dp->non_pmd_mutex);
}
}
+static void
+pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
+{
+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
+ if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) {
+ VLOG_ABORT("static_tx_qid allocation failed for PMD on core %2d"
+ ", numa_id %d.", pmd->core_id, pmd->numa_id);
+ }
+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
+
+ VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d"
+ ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, pmd->numa_id);
+}
+
+static void
+pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
+{
+ ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
+ id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid);
+ ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
+}
+
static int
pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
struct polled_queue **ppoll_list)
bool exiting;
int poll_cnt;
int i;
+ int process_packets = 0;
poll_list = NULL;
ovs_numa_thread_setaffinity_core(pmd->core_id);
dpdk_set_lcore_id(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
-reload:
emc_cache_init(&pmd->flow_cache);
+reload:
+ pmd_alloc_static_tx_qid(pmd);
/* List port/core affinity */
for (i = 0; i < poll_cnt; i++) {
lc = UINT_MAX;
}
+ cycles_count_start(pmd);
for (;;) {
for (i = 0; i < poll_cnt; i++) {
- dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
- poll_list[i].port_no);
+ process_packets =
+ dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
+ poll_list[i].port_no);
+ cycles_count_intermediate(pmd,
+ process_packets ? PMD_CYCLES_PROCESSING
+ : PMD_CYCLES_IDLE);
}
if (lc++ > 1024) {
}
}
+ cycles_count_end(pmd, PMD_CYCLES_IDLE);
+
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
exiting = latch_is_set(&pmd->exit_latch);
/* Signal here to make sure the pmd finishes
* reloading the updated configuration. */
dp_netdev_pmd_reload_done(pmd);
- emc_cache_uninit(&pmd->flow_cache);
+ pmd_free_static_tx_qid(pmd);
if (!exiting) {
goto reload;
}
+ emc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
return NULL;
pmd->numa_id = numa_id;
pmd->need_reload = false;
- *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads);
-
ovs_refcount_init(&pmd->ref_cnt);
latch_init(&pmd->exit_latch);
pmd->reload_seq = seq_create();
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
emc_cache_init(&pmd->flow_cache);
+ pmd_alloc_static_tx_qid(pmd);
}
cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
hash_int(core_id, 0));
ovs_mutex_lock(&dp->non_pmd_mutex);
emc_cache_uninit(&pmd->flow_cache);
pmd_free_cached_ports(pmd);
+ pmd_free_static_tx_qid(pmd);
ovs_mutex_unlock(&dp->non_pmd_mutex);
} else {
latch_set(&pmd->exit_latch);
size_t n_missed = 0, n_dropped = 0;
struct dp_packet *packet;
const size_t size = dp_packet_batch_size(packets_);
+ uint32_t cur_min;
int i;
+ atomic_read_relaxed(&pmd->dp->emc_insert_min, &cur_min);
+
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
struct dp_netdev_flow *flow;
key->len = 0; /* Not computed yet. */
key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
- flow = emc_lookup(flow_cache, key);
+ /* If EMC is disabled skip emc_lookup */
+ flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);
if (OVS_LIKELY(flow)) {
dp_netdev_queue_batches(packet, flow, &key->mf, batches,
n_batches);
data = nl_attr_get(attr);
- tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
+ tun_port = pmd_tnl_port_cache_lookup(pmd, data->tnl_port);
if (!tun_port) {
err = -EINVAL;
goto error;
case OVS_ACTION_ATTR_TUNNEL_PUSH:
if (*depth < MAX_RECIRC_DEPTH) {
- struct dp_packet_batch tnl_pkt;
- struct dp_packet_batch *orig_packets_ = packets_;
- int err;
-
- if (!may_steal) {
- dp_packet_batch_clone(&tnl_pkt, packets_);
- packets_ = &tnl_pkt;
- dp_packet_batch_reset_cutlen(orig_packets_);
- }
-
dp_packet_batch_apply_cutlen(packets_);
-
- err = push_tnl_action(pmd, a, packets_);
- if (!err) {
- (*depth)++;
- dp_netdev_recirculate(pmd, packets_);
- (*depth)--;
- }
+ push_tnl_action(pmd, a, packets_);
return;
}
break;
const char *helper = NULL;
const uint32_t *setmark = NULL;
const struct ovs_key_ct_labels *setlabel = NULL;
+ struct nat_action_info_t nat_action_info;
+ struct nat_action_info_t *nat_action_info_ref = NULL;
+ bool nat_config = false;
NL_ATTR_FOR_EACH_UNSAFE (b, left, nl_attr_get(a),
nl_attr_get_size(a)) {
/* Silently ignored, as userspace datapath does not generate
* netlink events. */
break;
- case OVS_CT_ATTR_NAT:
+ case OVS_CT_ATTR_NAT: {
+ const struct nlattr *b_nest;
+ unsigned int left_nest;
+ bool ip_min_specified = false;
+ bool proto_num_min_specified = false;
+ bool ip_max_specified = false;
+ bool proto_num_max_specified = false;
+ memset(&nat_action_info, 0, sizeof nat_action_info);
+ nat_action_info_ref = &nat_action_info;
+
+ NL_NESTED_FOR_EACH_UNSAFE (b_nest, left_nest, b) {
+ enum ovs_nat_attr sub_type_nest = nl_attr_type(b_nest);
+
+ switch (sub_type_nest) {
+ case OVS_NAT_ATTR_SRC:
+ case OVS_NAT_ATTR_DST:
+ nat_config = true;
+ nat_action_info.nat_action |=
+ ((sub_type_nest == OVS_NAT_ATTR_SRC)
+ ? NAT_ACTION_SRC : NAT_ACTION_DST);
+ break;
+ case OVS_NAT_ATTR_IP_MIN:
+ memcpy(&nat_action_info.min_addr,
+ nl_attr_get(b_nest),
+ nl_attr_get_size(b_nest));
+ ip_min_specified = true;
+ break;
+ case OVS_NAT_ATTR_IP_MAX:
+ memcpy(&nat_action_info.max_addr,
+ nl_attr_get(b_nest),
+ nl_attr_get_size(b_nest));
+ ip_max_specified = true;
+ break;
+ case OVS_NAT_ATTR_PROTO_MIN:
+ nat_action_info.min_port =
+ nl_attr_get_u16(b_nest);
+ proto_num_min_specified = true;
+ break;
+ case OVS_NAT_ATTR_PROTO_MAX:
+ nat_action_info.max_port =
+ nl_attr_get_u16(b_nest);
+ proto_num_max_specified = true;
+ break;
+ case OVS_NAT_ATTR_PERSISTENT:
+ case OVS_NAT_ATTR_PROTO_HASH:
+ case OVS_NAT_ATTR_PROTO_RANDOM:
+ break;
+ case OVS_NAT_ATTR_UNSPEC:
+ case __OVS_NAT_ATTR_MAX:
+ OVS_NOT_REACHED();
+ }
+ }
+
+ if (ip_min_specified && !ip_max_specified) {
+ nat_action_info.max_addr = nat_action_info.min_addr;
+ }
+ if (proto_num_min_specified && !proto_num_max_specified) {
+ nat_action_info.max_port = nat_action_info.min_port;
+ }
+ if (proto_num_min_specified || proto_num_max_specified) {
+ if (nat_action_info.nat_action & NAT_ACTION_SRC) {
+ nat_action_info.nat_action |= NAT_ACTION_SRC_PORT;
+ } else if (nat_action_info.nat_action & NAT_ACTION_DST) {
+ nat_action_info.nat_action |= NAT_ACTION_DST_PORT;
+ }
+ }
+ break;
+ }
case OVS_CT_ATTR_UNSPEC:
case __OVS_CT_ATTR_MAX:
OVS_NOT_REACHED();
}
}
+ /* We won't be able to function properly in this case, hence
+ * complain loudly. */
+ if (nat_config && !commit) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ VLOG_WARN_RL(&rl, "NAT specified without commit.");
+ }
+
conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, force,
- commit, zone, setmark, setlabel, helper);
+ commit, zone, setmark, setlabel, helper,
+ nat_action_info_ref);
break;
}