#include <fcntl.h>
#include <inttypes.h>
#include <net/if.h>
+#include <sys/types.h>
#include <netinet/in.h>
#include <stdint.h>
#include <stdlib.h>
* actions in either case.
* */
struct dp_netdev_pmd_thread {
- PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
- struct dp_netdev *dp;
- struct cmap_node node; /* In 'dp->poll_threads'. */
- pthread_cond_t cond; /* For synchronizing pmd thread
- reload. */
- );
-
- PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
- struct ovs_mutex cond_mutex; /* Mutex for condition variable. */
- pthread_t thread;
- unsigned core_id; /* CPU core id of this pmd thread. */
- int numa_id; /* numa node id of this pmd thread. */
- );
+ struct dp_netdev *dp;
+ struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
+ struct cmap_node node; /* In 'dp->poll_threads'. */
+
+ pthread_cond_t cond; /* For synchronizing pmd thread reload. */
+ struct ovs_mutex cond_mutex; /* Mutex for condition variable. */
/* Per thread exact-match cache. Note, the instance for cpu core
* NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
* need to be protected by 'non_pmd_mutex'. Every other instance
* will only be accessed by its own pmd thread. */
- OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
- struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
-
- /* Queue id used by this pmd thread to send packets on all netdevs if
- * XPS disabled for this netdev. All static_tx_qid's are unique and less
- * than 'cmap_count(dp->poll_threads)'. */
- uint32_t static_tx_qid;
+ struct emc_cache flow_cache;
/* Flow-Table and classifiers
*
* 'flow_mutex'.
*/
struct ovs_mutex flow_mutex;
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- struct cmap flow_table OVS_GUARDED; /* Flow table. */
-
- /* One classifier per in_port polled by the pmd */
- struct cmap classifiers;
- /* Periodically sort subtable vectors according to hit frequencies */
- long long int next_optimization;
- /* End of the next time interval for which processing cycles
- are stored for each polled rxq. */
- long long int rxq_next_cycle_store;
-
- /* Cycles counters */
- struct dp_netdev_pmd_cycles cycles;
-
- /* Used to count cycles. See 'cycles_counter_end()'. */
- unsigned long long last_cycles;
- struct latch exit_latch; /* For terminating the pmd thread. */
- );
-
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- /* Statistics. */
- struct dp_netdev_pmd_stats stats;
-
- struct seq *reload_seq;
- uint64_t last_reload_seq;
- atomic_bool reload; /* Do we need to reload ports? */
- bool isolated;
-
- /* Set to true if the pmd thread needs to be reloaded. */
- bool need_reload;
- /* 5 pad bytes. */
- );
-
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- struct ovs_mutex port_mutex; /* Mutex for 'poll_list'
- and 'tx_ports'. */
- /* 16 pad bytes. */
- );
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- /* List of rx queues to poll. */
- struct hmap poll_list OVS_GUARDED;
- /* Map of 'tx_port's used for transmission. Written by the main
- * thread, read by the pmd thread. */
- struct hmap tx_ports OVS_GUARDED;
- );
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- /* These are thread-local copies of 'tx_ports'. One contains only
- * tunnel ports (that support push_tunnel/pop_tunnel), the other
- * contains ports with at least one txq (that support send).
- * A port can be in both.
- *
- * There are two separate maps to make sure that we don't try to
- * execute OUTPUT on a device which has 0 txqs or PUSH/POP on a
- * non-tunnel device.
- *
- * The instances for cpu core NON_PMD_CORE_ID can be accessed by
- * multiple threads and thusly need to be protected by 'non_pmd_mutex'.
- * Every other instance will only be accessed by its own pmd thread. */
- struct hmap tnl_port_cache;
- struct hmap send_port_cache;
- );
-
- PADDED_MEMBERS(CACHE_LINE_SIZE,
- /* Only a pmd thread can write on its own 'cycles' and 'stats'.
- * The main thread keeps 'stats_zero' and 'cycles_zero' as base
- * values and subtracts them from 'stats' and 'cycles' before
- * reporting to the user */
- unsigned long long stats_zero[DP_N_STATS];
- uint64_t cycles_zero[PMD_N_CYCLES];
- /* 8 pad bytes. */
- );
+ struct cmap flow_table OVS_GUARDED; /* Flow table. */
+
+ /* One classifier per in_port polled by the pmd */
+ struct cmap classifiers;
+ /* Periodically sort subtable vectors according to hit frequencies */
+ long long int next_optimization;
+ /* End of the next time interval for which processing cycles
+ are stored for each polled rxq. */
+ long long int rxq_next_cycle_store;
+
+ /* Statistics. */
+ struct dp_netdev_pmd_stats stats;
+
+ /* Cycles counters */
+ struct dp_netdev_pmd_cycles cycles;
+
+ /* Used to count cicles. See 'cycles_counter_end()' */
+ unsigned long long last_cycles;
+
+ struct latch exit_latch; /* For terminating the pmd thread. */
+ struct seq *reload_seq;
+ uint64_t last_reload_seq;
+ atomic_bool reload; /* Do we need to reload ports? */
+ pthread_t thread;
+ unsigned core_id; /* CPU core id of this pmd thread. */
+ int numa_id; /* numa node id of this pmd thread. */
+ bool isolated;
+
+ /* Queue id used by this pmd thread to send packets on all netdevs if
+ * XPS disabled for this netdev. All static_tx_qid's are unique and less
+ * than 'cmap_count(dp->poll_threads)'. */
+ uint32_t static_tx_qid;
+
+ struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */
+ /* List of rx queues to poll. */
+ struct hmap poll_list OVS_GUARDED;
+ /* Map of 'tx_port's used for transmission. Written by the main thread,
+ * read by the pmd thread. */
+ struct hmap tx_ports OVS_GUARDED;
+
+ /* These are thread-local copies of 'tx_ports'. One contains only tunnel
+ * ports (that support push_tunnel/pop_tunnel), the other contains ports
+ * with at least one txq (that support send). A port can be in both.
+ *
+ * There are two separate maps to make sure that we don't try to execute
+ * OUTPUT on a device which has 0 txqs or PUSH/POP on a non-tunnel device.
+ *
+ * The instances for cpu core NON_PMD_CORE_ID can be accessed by multiple
+ * threads, and thusly need to be protected by 'non_pmd_mutex'. Every
+ * other instance will only be accessed by its own pmd thread. */
+ struct hmap tnl_port_cache;
+ struct hmap send_port_cache;
+
+ /* Only a pmd thread can write on its own 'cycles' and 'stats'.
+ * The main thread keeps 'stats_zero' and 'cycles_zero' as base
+ * values and subtracts them from 'stats' and 'cycles' before
+ * reporting to the user */
+ unsigned long long stats_zero[DP_N_STATS];
+ uint64_t cycles_zero[PMD_N_CYCLES];
+
+ /* Set to true if the pmd thread needs to be reloaded. */
+ bool need_reload;
};
/* Interface to netdev-based datapath. */
/* If this is part of a probe, Drop the packet, since executing
* the action may actually cause spurious packets be sent into
* the network. */
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ dp_netdev_pmd_unref(pmd);
+ }
return 0;
}
/* Sort Rx Queues by the processing cycles they are consuming. */
static int
-rxq_cycle_sort(const void *a, const void *b)
+compare_rxq_cycles(const void *a, const void *b)
{
struct dp_netdev_rxq *qa;
struct dp_netdev_rxq *qb;
- uint64_t total_qa, total_qb;
- unsigned i;
+ uint64_t cycles_qa, cycles_qb;
qa = *(struct dp_netdev_rxq **) a;
qb = *(struct dp_netdev_rxq **) b;
- total_qa = total_qb = 0;
- for (i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
- total_qa += dp_netdev_rxq_get_intrvl_cycles(qa, i);
- total_qb += dp_netdev_rxq_get_intrvl_cycles(qb, i);
- }
- dp_netdev_rxq_set_cycles(qa, RXQ_CYCLES_PROC_HIST, total_qa);
- dp_netdev_rxq_set_cycles(qb, RXQ_CYCLES_PROC_HIST, total_qb);
+ cycles_qa = dp_netdev_rxq_get_cycles(qa, RXQ_CYCLES_PROC_HIST);
+ cycles_qb = dp_netdev_rxq_get_cycles(qb, RXQ_CYCLES_PROC_HIST);
- if (total_qa >= total_qb) {
- return -1;
+ if (cycles_qa != cycles_qb) {
+ return (cycles_qa < cycles_qb) ? 1 : -1;
+ } else {
+ /* Cycles are the same so tiebreak on port/queue id.
+ * Tiebreaking (as opposed to return 0) ensures consistent
+ * sort results across multiple OS's. */
+ uint32_t port_qa = odp_to_u32(qa->port->port_no);
+ uint32_t port_qb = odp_to_u32(qb->port->port_no);
+ if (port_qa != port_qb) {
+ return port_qa > port_qb ? 1 : -1;
+ } else {
+ return netdev_rxq_get_queue_id(qa->rx)
+ - netdev_rxq_get_queue_id(qb->rx);
+ }
}
- return 1;
}
/* Assign pmds to queues. If 'pinned' is true, assign pmds to pinned
dp_netdev_pmd_unref(pmd);
}
} else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
+ uint64_t cycle_hist = 0;
+
if (n_rxqs == 0) {
rxqs = xmalloc(sizeof *rxqs);
} else {
rxqs = xrealloc(rxqs, sizeof *rxqs * (n_rxqs + 1));
}
+ /* Sum the queue intervals and store the cycle history. */
+ for (unsigned i = 0; i < PMD_RXQ_INTERVAL_MAX; i++) {
+ cycle_hist += dp_netdev_rxq_get_intrvl_cycles(q, i);
+ }
+ dp_netdev_rxq_set_cycles(q, RXQ_CYCLES_PROC_HIST, cycle_hist);
+
/* Store the queue. */
rxqs[n_rxqs++] = q;
}
if (n_rxqs > 1) {
/* Sort the queues in order of the processing cycles
* they consumed during their last pmd interval. */
- qsort(rxqs, n_rxqs, sizeof *rxqs, rxq_cycle_sort);
+ qsort(rxqs, n_rxqs, sizeof *rxqs, compare_rxq_cycles);
}
rr_numa_list_populate(dp, &rr);
}
conntrack_execute(&dp->conntrack, packets_, aux->flow->dl_type, force,
- commit, zone, setmark, setlabel, helper,
- nat_action_info_ref, now);
+ commit, zone, setmark, setlabel, aux->flow->tp_src,
+ aux->flow->tp_dst, helper, nat_action_info_ref, now);
break;
}
}
static int
-dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone)
+dpif_netdev_ct_flush(struct dpif *dpif, const uint16_t *zone,
+ const struct ct_dpif_tuple *tuple)
{
struct dp_netdev *dp = get_dp_netdev(dpif);
+ if (tuple) {
+ return EOPNOTSUPP;
+ }
return conntrack_flush(&dp->conntrack, zone);
}