]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netdev.c
dpif-netdev: Fix memory leak in tunnel header push action.
[ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
d262ac2c 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
db73f716 18#include "dpif-netdev.h"
72865317 19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
7f3adc00 24#include <net/if.h>
7daedce4 25#include <netinet/in.h>
cdee00fd 26#include <stdint.h>
72865317
BP
27#include <stdlib.h>
28#include <string.h>
29#include <sys/ioctl.h>
7daedce4 30#include <sys/socket.h>
72865317 31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
9f861c91 34#include "bitmap.h"
59e6d833 35#include "cmap.h"
7daedce4 36#include "coverage.h"
72865317 37#include "csum.h"
e14deea0 38#include "dp-packet.h"
614c4892 39#include "dpif.h"
72865317 40#include "dpif-provider.h"
614c4892 41#include "dummy.h"
afae68b1 42#include "fat-rwlock.h"
72865317 43#include "flow.h"
762d146a 44#include "hmapx.h"
6c3eee82 45#include "latch.h"
72865317 46#include "netdev.h"
8617afff 47#include "netdev-dpdk.h"
de281153 48#include "netdev-vport.h"
cdee00fd 49#include "netlink.h"
f094af7b 50#include "odp-execute.h"
72865317 51#include "odp-util.h"
25d436fb
BW
52#include "openvswitch/dynamic-string.h"
53#include "openvswitch/list.h"
54#include "openvswitch/match.h"
55#include "openvswitch/ofp-print.h"
64c96779 56#include "openvswitch/ofpbuf.h"
25d436fb 57#include "openvswitch/vlog.h"
5a034064 58#include "ovs-numa.h"
61e7deb1 59#include "ovs-rcu.h"
72865317
BP
60#include "packets.h"
61#include "poll-loop.h"
0de8783a 62#include "pvector.h"
26c6b6cd 63#include "random.h"
d33ed218 64#include "seq.h"
462278db 65#include "shash.h"
0cbfe35d 66#include "sset.h"
72865317 67#include "timeval.h"
53902038 68#include "tnl-neigh-cache.h"
7f9b8504 69#include "tnl-ports.h"
74cc3969 70#include "unixctl.h"
72865317 71#include "util.h"
7daedce4 72
d98e6007 73VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 74
8bb113da 75#define FLOW_DUMP_MAX_BATCH 50
adcf00ba
AZ
76/* Use per thread recirc_depth to prevent recirculation loop. */
77#define MAX_RECIRC_DEPTH 5
78DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
e4cfed38 79
72865317 80/* Configuration parameters. */
72865317
BP
81enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
82
8a4e3a85
BP
83/* Protects against changes to 'dp_netdevs'. */
84static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
85
86/* Contains all 'struct dp_netdev's. */
87static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
88 = SHASH_INITIALIZER(&dp_netdevs);
89
623540e4 90static struct vlog_rate_limit upcall_rl = VLOG_RATE_LIMIT_INIT(600, 600);
6b31e073 91
2494ccd7
JS
92static struct odp_support dp_netdev_support = {
93 .max_mpls_depth = SIZE_MAX,
94 .recirc = true,
95};
96
79df317f 97/* Stores a miniflow with inline values */
9bbf1c3d 98
9bbf1c3d 99struct netdev_flow_key {
caeb4906
JR
100 uint32_t hash; /* Hash function differs for different users. */
101 uint32_t len; /* Length of the following miniflow (incl. map). */
0de8783a 102 struct miniflow mf;
8fd47924 103 uint64_t buf[FLOW_MAX_PACKET_U64S];
9bbf1c3d
DDP
104};
105
106/* Exact match cache for frequently used flows
107 *
108 * The cache uses a 32-bit hash of the packet (which can be the RSS hash) to
109 * search its entries for a miniflow that matches exactly the miniflow of the
0de8783a 110 * packet. It stores the 'dpcls_rule' (rule) that matches the miniflow.
9bbf1c3d
DDP
111 *
112 * A cache entry holds a reference to its 'dp_netdev_flow'.
113 *
114 * A miniflow with a given hash can be in one of EM_FLOW_HASH_SEGS different
115 * entries. The 32-bit hash is split into EM_FLOW_HASH_SEGS values (each of
116 * them is EM_FLOW_HASH_SHIFT bits wide and the remainder is thrown away). Each
117 * value is the index of a cache entry where the miniflow could be.
118 *
119 *
120 * Thread-safety
121 * =============
122 *
123 * Each pmd_thread has its own private exact match cache.
124 * If dp_netdev_input is not called from a pmd thread, a mutex is used.
125 */
126
fc82e877 127#define EM_FLOW_HASH_SHIFT 13
9bbf1c3d
DDP
128#define EM_FLOW_HASH_ENTRIES (1u << EM_FLOW_HASH_SHIFT)
129#define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
130#define EM_FLOW_HASH_SEGS 2
131
132struct emc_entry {
9bbf1c3d 133 struct dp_netdev_flow *flow;
0de8783a 134 struct netdev_flow_key key; /* key.hash used for emc hash value. */
9bbf1c3d
DDP
135};
136
137struct emc_cache {
138 struct emc_entry entries[EM_FLOW_HASH_ENTRIES];
67ad54cb 139 int sweep_idx; /* For emc_cache_slow_sweep(). */
9bbf1c3d
DDP
140};
141
142/* Iterate in the exact match cache through every entry that might contain a
143 * miniflow with hash 'HASH'. */
144#define EMC_FOR_EACH_POS_WITH_HASH(EMC, CURRENT_ENTRY, HASH) \
145 for (uint32_t i__ = 0, srch_hash__ = (HASH); \
146 (CURRENT_ENTRY) = &(EMC)->entries[srch_hash__ & EM_FLOW_HASH_MASK], \
147 i__ < EM_FLOW_HASH_SEGS; \
148 i__++, srch_hash__ >>= EM_FLOW_HASH_SHIFT)
0de8783a
JR
149\f
150/* Simple non-wildcarding single-priority classifier. */
151
152struct dpcls {
153 struct cmap subtables_map;
154 struct pvector subtables;
155};
9bbf1c3d 156
0de8783a
JR
157/* A rule to be inserted to the classifier. */
158struct dpcls_rule {
159 struct cmap_node cmap_node; /* Within struct dpcls_subtable 'rules'. */
160 struct netdev_flow_key *mask; /* Subtable's mask. */
161 struct netdev_flow_key flow; /* Matching key. */
162 /* 'flow' must be the last field, additional space is allocated here. */
163};
164
165static void dpcls_init(struct dpcls *);
166static void dpcls_destroy(struct dpcls *);
167static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
168 const struct netdev_flow_key *mask);
169static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
170static bool dpcls_lookup(const struct dpcls *cls,
171 const struct netdev_flow_key keys[],
172 struct dpcls_rule **rules, size_t cnt);
173\f
8a4e3a85
BP
174/* Datapath based on the network device interface from netdev.h.
175 *
176 *
177 * Thread-safety
178 * =============
179 *
180 * Some members, marked 'const', are immutable. Accessing other members
181 * requires synchronization, as noted in more detail below.
182 *
183 * Acquisition order is, from outermost to innermost:
184 *
185 * dp_netdev_mutex (global)
59e6d833 186 * port_mutex
8a4e3a85 187 */
72865317 188struct dp_netdev {
8a4e3a85
BP
189 const struct dpif_class *const class;
190 const char *const name;
6b31e073 191 struct dpif *dpif;
6a8267c5
BP
192 struct ovs_refcount ref_cnt;
193 atomic_flag destroyed;
72865317 194
8a4e3a85
BP
195 /* Ports.
196 *
59e6d833
BP
197 * Protected by RCU. Take the mutex to add or remove ports. */
198 struct ovs_mutex port_mutex;
199 struct cmap ports;
d33ed218 200 struct seq *port_seq; /* Incremented whenever a port changes. */
6c3eee82 201
6b31e073
RW
202 /* Protects access to ofproto-dpif-upcall interface during revalidator
203 * thread synchronization. */
204 struct fat_rwlock upcall_rwlock;
623540e4
EJ
205 upcall_callback *upcall_cb; /* Callback function for executing upcalls. */
206 void *upcall_aux;
6b31e073 207
e4e74c3a
AW
208 /* Callback function for notifying the purging of dp flows (during
209 * reseting pmd deletion). */
210 dp_purge_callback *dp_purge_cb;
211 void *dp_purge_aux;
212
65f13b50
AW
213 /* Stores all 'struct dp_netdev_pmd_thread's. */
214 struct cmap poll_threads;
215
216 /* Protects the access of the 'struct dp_netdev_pmd_thread'
217 * instance for non-pmd thread. */
218 struct ovs_mutex non_pmd_mutex;
219
220 /* Each pmd thread will store its pointer to
221 * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */
222 ovsthread_key_t per_pmd_key;
f2eee189 223
a14b8947 224 /* Cpu mask for pin of pmd threads. */
f2eee189 225 char *pmd_cmask;
a36de779 226 uint64_t last_tnl_conf_seq;
72865317
BP
227};
228
8a4e3a85 229static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
59e6d833 230 odp_port_t);
ff073a71 231
51852a57 232enum dp_stat_type {
abcf3ef4
DDP
233 DP_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */
234 DP_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
51852a57
BP
235 DP_STAT_MISS, /* Packets that did not match. */
236 DP_STAT_LOST, /* Packets not passed up to the client. */
237 DP_N_STATS
238};
239
55e3ca97
DDP
240enum pmd_cycles_counter_type {
241 PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
242 PMD_CYCLES_PROCESSING, /* Cycles spent processing packets */
243 PMD_N_CYCLES
244};
245
72865317
BP
246/* A port in a netdev-based datapath. */
247struct dp_netdev_port {
35303d71 248 odp_port_t port_no;
72865317 249 struct netdev *netdev;
efa2bcbb 250 struct cmap_node node; /* Node in dp_netdev's 'ports'. */
4b609110 251 struct netdev_saved_flags *sf;
490e82af 252 unsigned n_rxq; /* Number of elements in 'rxq' */
55c955bd 253 struct netdev_rxq **rxq;
0cbfe35d 254 char *type; /* Port type as requested by user. */
a14b8947
IM
255 int latest_requested_n_rxq; /* Latest requested from netdev number
256 of rx queues. */
72865317
BP
257};
258
1c1e46ed
AW
259/* Contained by struct dp_netdev_flow's 'stats' member. */
260struct dp_netdev_flow_stats {
eb94da30
DDP
261 atomic_llong used; /* Last used time, in monotonic msecs. */
262 atomic_ullong packet_count; /* Number of packets matched. */
263 atomic_ullong byte_count; /* Number of bytes matched. */
264 atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
1c1e46ed
AW
265};
266
267/* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
8a4e3a85
BP
268 *
269 *
270 * Thread-safety
271 * =============
272 *
273 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
1c1e46ed 274 * its pmd thread's classifier. The text below calls this classifier 'cls'.
8a4e3a85
BP
275 *
276 * Motivation
277 * ----------
278 *
279 * The thread safety rules described here for "struct dp_netdev_flow" are
280 * motivated by two goals:
281 *
282 * - Prevent threads that read members of "struct dp_netdev_flow" from
283 * reading bad data due to changes by some thread concurrently modifying
284 * those members.
285 *
286 * - Prevent two threads making changes to members of a given "struct
287 * dp_netdev_flow" from interfering with each other.
288 *
289 *
290 * Rules
291 * -----
292 *
ed79f89a
DDP
293 * A flow 'flow' may be accessed without a risk of being freed during an RCU
294 * grace period. Code that needs to hold onto a flow for a while
295 * should try incrementing 'flow->ref_cnt' with dp_netdev_flow_ref().
8a4e3a85
BP
296 *
297 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
ed79f89a
DDP
298 * flow from being deleted from 'cls' and it doesn't protect members of 'flow'
299 * from modification.
8a4e3a85
BP
300 *
301 * Some members, marked 'const', are immutable. Accessing other members
302 * requires synchronization, as noted in more detail below.
303 */
72865317 304struct dp_netdev_flow {
11e5cf1f 305 const struct flow flow; /* Unmasked flow that created this entry. */
8a4e3a85 306 /* Hash table index by unmasked flow. */
1c1e46ed
AW
307 const struct cmap_node node; /* In owning dp_netdev_pmd_thread's */
308 /* 'flow_table'. */
70e5ed6f 309 const ovs_u128 ufid; /* Unique flow identifier. */
bd5131ba 310 const unsigned pmd_id; /* The 'core_id' of pmd thread owning this */
1c1e46ed 311 /* flow. */
72865317 312
ed79f89a
DDP
313 /* Number of references.
314 * The classifier owns one reference.
315 * Any thread trying to keep a rule from being freed should hold its own
316 * reference. */
317 struct ovs_refcount ref_cnt;
318
11e5cf1f
DDP
319 bool dead;
320
1c1e46ed
AW
321 /* Statistics. */
322 struct dp_netdev_flow_stats stats;
8a4e3a85 323
45c626a3 324 /* Actions. */
61e7deb1 325 OVSRCU_TYPE(struct dp_netdev_actions *) actions;
0de8783a 326
11e5cf1f
DDP
327 /* While processing a group of input packets, the datapath uses the next
328 * member to store a pointer to the output batch for the flow. It is
329 * reset after the batch has been sent out (See dp_netdev_queue_batches(),
f7ce4811
PS
330 * packet_batch_per_flow_init() and packet_batch_per_flow_execute()). */
331 struct packet_batch_per_flow *batch;
11e5cf1f 332
0de8783a
JR
333 /* Packet classification. */
334 struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */
335 /* 'cr' must be the last member. */
72865317
BP
336};
337
ed79f89a 338static void dp_netdev_flow_unref(struct dp_netdev_flow *);
9bbf1c3d 339static bool dp_netdev_flow_ref(struct dp_netdev_flow *);
70e5ed6f
JS
340static int dpif_netdev_flow_from_nlattrs(const struct nlattr *, uint32_t,
341 struct flow *);
8a4e3a85 342
a84cb64a
BP
343/* A set of datapath actions within a "struct dp_netdev_flow".
344 *
345 *
346 * Thread-safety
347 * =============
348 *
45c626a3 349 * A struct dp_netdev_actions 'actions' is protected with RCU. */
a84cb64a 350struct dp_netdev_actions {
a84cb64a
BP
351 /* These members are immutable: they do not change during the struct's
352 * lifetime. */
a84cb64a 353 unsigned int size; /* Size of 'actions', in bytes. */
9ff55ae2 354 struct nlattr actions[]; /* Sequence of OVS_ACTION_ATTR_* attributes. */
a84cb64a
BP
355};
356
357struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
358 size_t);
61e7deb1
BP
359struct dp_netdev_actions *dp_netdev_flow_get_actions(
360 const struct dp_netdev_flow *);
361static void dp_netdev_actions_free(struct dp_netdev_actions *);
a84cb64a 362
1c1e46ed
AW
363/* Contained by struct dp_netdev_pmd_thread's 'stats' member. */
364struct dp_netdev_pmd_stats {
365 /* Indexed by DP_STAT_*. */
eb94da30 366 atomic_ullong n[DP_N_STATS];
1c1e46ed
AW
367};
368
55e3ca97
DDP
369/* Contained by struct dp_netdev_pmd_thread's 'cycle' member. */
370struct dp_netdev_pmd_cycles {
371 /* Indexed by PMD_CYCLES_*. */
372 atomic_ullong n[PMD_N_CYCLES];
373};
374
ae7ad0a1
IM
375/* Contained by struct dp_netdev_pmd_thread's 'poll_list' member. */
376struct rxq_poll {
377 struct dp_netdev_port *port;
378 struct netdev_rxq *rx;
379 struct ovs_list node;
380};
381
e4cfed38
PS
382/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
383 * the performance overhead of interrupt processing. Therefore netdev can
384 * not implement rx-wait for these devices. dpif-netdev needs to poll
385 * these device to check for recv buffer. pmd-thread does polling for
1c1e46ed 386 * devices assigned to itself.
e4cfed38
PS
387 *
388 * DPDK used PMD for accessing NIC.
389 *
65f13b50
AW
390 * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for
391 * I/O of all non-pmd threads. There will be no actual thread created
392 * for the instance.
1c1e46ed
AW
393 *
394 * Each struct has its own flow table and classifier. Packets received
395 * from managed ports are looked up in the corresponding pmd thread's
396 * flow table, and are executed with the found actions.
397 * */
65f13b50 398struct dp_netdev_pmd_thread {
6c3eee82 399 struct dp_netdev *dp;
1c1e46ed 400 struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
65f13b50 401 struct cmap_node node; /* In 'dp->poll_threads'. */
accf8626
AW
402
403 pthread_cond_t cond; /* For synchronizing pmd thread reload. */
404 struct ovs_mutex cond_mutex; /* Mutex for condition variable. */
405
65f13b50
AW
406 /* Per thread exact-match cache. Note, the instance for cpu core
407 * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
408 * need to be protected (e.g. by 'dp_netdev_mutex'). All other
409 * instances will only be accessed by its own pmd thread. */
9bbf1c3d 410 struct emc_cache flow_cache;
1c1e46ed
AW
411
412 /* Classifier and Flow-Table.
413 *
414 * Writers of 'flow_table' must take the 'flow_mutex'. Corresponding
415 * changes to 'cls' must be made while still holding the 'flow_mutex'.
416 */
417 struct ovs_mutex flow_mutex;
418 struct dpcls cls;
419 struct cmap flow_table OVS_GUARDED; /* Flow table. */
420
421 /* Statistics. */
422 struct dp_netdev_pmd_stats stats;
423
55e3ca97
DDP
424 /* Cycles counters */
425 struct dp_netdev_pmd_cycles cycles;
426
427 /* Used to count cicles. See 'cycles_counter_end()' */
428 unsigned long long last_cycles;
429
65f13b50
AW
430 struct latch exit_latch; /* For terminating the pmd thread. */
431 atomic_uint change_seq; /* For reloading pmd ports. */
6c3eee82 432 pthread_t thread;
65f13b50
AW
433 int index; /* Idx of this pmd thread among pmd*/
434 /* threads on same numa node. */
bd5131ba 435 unsigned core_id; /* CPU core id of this pmd thread. */
65f13b50 436 int numa_id; /* numa node id of this pmd thread. */
347ba9bb 437 atomic_int tx_qid; /* Queue id used by this pmd thread to
3bcc10c0 438 * send packets on all netdevs */
6553d06b 439
ae7ad0a1
IM
440 struct ovs_mutex poll_mutex; /* Mutex for poll_list. */
441 /* List of rx queues to poll. */
442 struct ovs_list poll_list OVS_GUARDED;
443 int poll_cnt; /* Number of elemints in poll_list. */
444
6553d06b
DDP
445 /* Only a pmd thread can write on its own 'cycles' and 'stats'.
446 * The main thread keeps 'stats_zero' and 'cycles_zero' as base
447 * values and subtracts them from 'stats' and 'cycles' before
448 * reporting to the user */
449 unsigned long long stats_zero[DP_N_STATS];
450 uint64_t cycles_zero[PMD_N_CYCLES];
6c3eee82
BP
451};
452
84067a4c
JR
453#define PMD_INITIAL_SEQ 1
454
72865317
BP
455/* Interface to netdev-based datapath. */
456struct dpif_netdev {
457 struct dpif dpif;
458 struct dp_netdev *dp;
d33ed218 459 uint64_t last_port_seq;
72865317
BP
460};
461
8a4e3a85 462static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
59e6d833 463 struct dp_netdev_port **portp);
8a4e3a85 464static int get_port_by_name(struct dp_netdev *dp, const char *devname,
59e6d833 465 struct dp_netdev_port **portp);
8a4e3a85
BP
466static void dp_netdev_free(struct dp_netdev *)
467 OVS_REQUIRES(dp_netdev_mutex);
8a4e3a85
BP
468static int do_add_port(struct dp_netdev *dp, const char *devname,
469 const char *type, odp_port_t port_no)
59e6d833 470 OVS_REQUIRES(dp->port_mutex);
c40b890f 471static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
59e6d833 472 OVS_REQUIRES(dp->port_mutex);
614c4892
BP
473static int dpif_netdev_open(const struct dpif_class *, const char *name,
474 bool create, struct dpif **);
65f13b50 475static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
1895cc8d 476 struct dp_packet_batch *,
41ccaa24 477 bool may_steal,
4edb9ae9 478 const struct nlattr *actions,
e4cfed38 479 size_t actions_len);
65f13b50 480static void dp_netdev_input(struct dp_netdev_pmd_thread *,
1895cc8d 481 struct dp_packet_batch *, odp_port_t port_no);
a90ed026 482static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
1895cc8d 483 struct dp_packet_batch *);
41ccaa24 484
6b31e073 485static void dp_netdev_disable_upcall(struct dp_netdev *);
ae7ad0a1 486static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
65f13b50
AW
487static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
488 struct dp_netdev *dp, int index,
bd5131ba 489 unsigned core_id, int numa_id);
1c1e46ed 490static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
f2eee189 491static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
b19befae 492static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
bd5131ba 493 unsigned core_id);
1c1e46ed
AW
494static struct dp_netdev_pmd_thread *
495dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
65f13b50
AW
496static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
497static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
498static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
cc245ce8
IM
499static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
500static void dp_netdev_del_port_from_pmd(struct dp_netdev_port *port,
501 struct dp_netdev_pmd_thread *pmd);
502static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
503 struct dp_netdev_port *port);
504static void
505dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port);
ae7ad0a1
IM
506static void
507dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
508 struct dp_netdev_port *port, struct netdev_rxq *rx);
509static struct dp_netdev_pmd_thread *
510dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id);
f2eee189 511static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
1c1e46ed
AW
512static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
513static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
514static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
72865317 515
67ad54cb 516static inline bool emc_entry_alive(struct emc_entry *ce);
9bbf1c3d
DDP
517static void emc_clear_entry(struct emc_entry *ce);
518
519static void
520emc_cache_init(struct emc_cache *flow_cache)
521{
522 int i;
523
67ad54cb 524 flow_cache->sweep_idx = 0;
9bbf1c3d
DDP
525 for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
526 flow_cache->entries[i].flow = NULL;
0de8783a 527 flow_cache->entries[i].key.hash = 0;
09b0fa9c 528 flow_cache->entries[i].key.len = sizeof(struct miniflow);
5fcff47b 529 flowmap_init(&flow_cache->entries[i].key.mf.map);
9bbf1c3d
DDP
530 }
531}
532
533static void
534emc_cache_uninit(struct emc_cache *flow_cache)
535{
536 int i;
537
538 for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {
539 emc_clear_entry(&flow_cache->entries[i]);
540 }
541}
542
67ad54cb
AW
543/* Check and clear dead flow references slowly (one entry at each
544 * invocation). */
545static void
546emc_cache_slow_sweep(struct emc_cache *flow_cache)
547{
548 struct emc_entry *entry = &flow_cache->entries[flow_cache->sweep_idx];
549
550 if (!emc_entry_alive(entry)) {
551 emc_clear_entry(entry);
552 }
553 flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
554}
555
c4ea7529
BP
556/* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
557bool
558dpif_is_netdev(const struct dpif *dpif)
559{
560 return dpif->dpif_class->open == dpif_netdev_open;
561}
562
72865317
BP
563static struct dpif_netdev *
564dpif_netdev_cast(const struct dpif *dpif)
565{
c4ea7529 566 ovs_assert(dpif_is_netdev(dpif));
72865317
BP
567 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
568}
569
570static struct dp_netdev *
571get_dp_netdev(const struct dpif *dpif)
572{
573 return dpif_netdev_cast(dpif)->dp;
574}
6553d06b
DDP
575\f
576enum pmd_info_type {
ce179f11
IM
577 PMD_INFO_SHOW_STATS, /* Show how cpu cycles are spent. */
578 PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
579 PMD_INFO_SHOW_RXQ /* Show poll-lists of pmd threads. */
6553d06b
DDP
580};
581
582static void
583pmd_info_show_stats(struct ds *reply,
584 struct dp_netdev_pmd_thread *pmd,
585 unsigned long long stats[DP_N_STATS],
586 uint64_t cycles[PMD_N_CYCLES])
587{
588 unsigned long long total_packets = 0;
589 uint64_t total_cycles = 0;
590 int i;
591
592 /* These loops subtracts reference values ('*_zero') from the counters.
593 * Since loads and stores are relaxed, it might be possible for a '*_zero'
594 * value to be more recent than the current value we're reading from the
595 * counter. This is not a big problem, since these numbers are not
596 * supposed to be too accurate, but we should at least make sure that
597 * the result is not negative. */
598 for (i = 0; i < DP_N_STATS; i++) {
599 if (stats[i] > pmd->stats_zero[i]) {
600 stats[i] -= pmd->stats_zero[i];
601 } else {
602 stats[i] = 0;
603 }
604
605 if (i != DP_STAT_LOST) {
606 /* Lost packets are already included in DP_STAT_MISS */
607 total_packets += stats[i];
608 }
609 }
610
611 for (i = 0; i < PMD_N_CYCLES; i++) {
612 if (cycles[i] > pmd->cycles_zero[i]) {
613 cycles[i] -= pmd->cycles_zero[i];
614 } else {
615 cycles[i] = 0;
616 }
617
618 total_cycles += cycles[i];
619 }
620
621 ds_put_cstr(reply, (pmd->core_id == NON_PMD_CORE_ID)
622 ? "main thread" : "pmd thread");
623
624 if (pmd->numa_id != OVS_NUMA_UNSPEC) {
625 ds_put_format(reply, " numa_id %d", pmd->numa_id);
626 }
d5c199ea 627 if (pmd->core_id != OVS_CORE_UNSPEC && pmd->core_id != NON_PMD_CORE_ID) {
bd5131ba 628 ds_put_format(reply, " core_id %u", pmd->core_id);
6553d06b
DDP
629 }
630 ds_put_cstr(reply, ":\n");
631
632 ds_put_format(reply,
633 "\temc hits:%llu\n\tmegaflow hits:%llu\n"
634 "\tmiss:%llu\n\tlost:%llu\n",
635 stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
636 stats[DP_STAT_MISS], stats[DP_STAT_LOST]);
637
638 if (total_cycles == 0) {
639 return;
640 }
641
642 ds_put_format(reply,
643 "\tpolling cycles:%"PRIu64" (%.02f%%)\n"
644 "\tprocessing cycles:%"PRIu64" (%.02f%%)\n",
645 cycles[PMD_CYCLES_POLLING],
646 cycles[PMD_CYCLES_POLLING] / (double)total_cycles * 100,
647 cycles[PMD_CYCLES_PROCESSING],
648 cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);
649
650 if (total_packets == 0) {
651 return;
652 }
653
654 ds_put_format(reply,
655 "\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n",
656 total_cycles / (double)total_packets,
657 total_cycles, total_packets);
658
659 ds_put_format(reply,
660 "\tavg processing cycles per packet: "
661 "%.02f (%"PRIu64"/%llu)\n",
662 cycles[PMD_CYCLES_PROCESSING] / (double)total_packets,
663 cycles[PMD_CYCLES_PROCESSING], total_packets);
664}
665
666static void
667pmd_info_clear_stats(struct ds *reply OVS_UNUSED,
668 struct dp_netdev_pmd_thread *pmd,
669 unsigned long long stats[DP_N_STATS],
670 uint64_t cycles[PMD_N_CYCLES])
671{
672 int i;
673
674 /* We cannot write 'stats' and 'cycles' (because they're written by other
675 * threads) and we shouldn't change 'stats' (because they're used to count
676 * datapath stats, which must not be cleared here). Instead, we save the
677 * current values and subtract them from the values to be displayed in the
678 * future */
679 for (i = 0; i < DP_N_STATS; i++) {
680 pmd->stats_zero[i] = stats[i];
681 }
682 for (i = 0; i < PMD_N_CYCLES; i++) {
683 pmd->cycles_zero[i] = cycles[i];
684 }
685}
686
ce179f11
IM
687static void
688pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd)
689{
690 if (pmd->core_id != NON_PMD_CORE_ID) {
691 struct rxq_poll *poll;
692 const char *prev_name = NULL;
693
694 ds_put_format(reply, "pmd thread numa_id %d core_id %u:\n",
695 pmd->numa_id, pmd->core_id);
696
697 ovs_mutex_lock(&pmd->poll_mutex);
698 LIST_FOR_EACH (poll, node, &pmd->poll_list) {
699 const char *name = netdev_get_name(poll->port->netdev);
700
701 if (!prev_name || strcmp(name, prev_name)) {
702 if (prev_name) {
703 ds_put_cstr(reply, "\n");
704 }
705 ds_put_format(reply, "\tport: %s\tqueue-id:",
706 netdev_get_name(poll->port->netdev));
707 }
708 ds_put_format(reply, " %d", netdev_rxq_get_queue_id(poll->rx));
709 prev_name = name;
710 }
711 ovs_mutex_unlock(&pmd->poll_mutex);
712 ds_put_cstr(reply, "\n");
713 }
714}
715
6553d06b
DDP
716static void
717dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
718 void *aux)
719{
720 struct ds reply = DS_EMPTY_INITIALIZER;
721 struct dp_netdev_pmd_thread *pmd;
722 struct dp_netdev *dp = NULL;
723 enum pmd_info_type type = *(enum pmd_info_type *) aux;
724
725 ovs_mutex_lock(&dp_netdev_mutex);
726
727 if (argc == 2) {
728 dp = shash_find_data(&dp_netdevs, argv[1]);
729 } else if (shash_count(&dp_netdevs) == 1) {
730 /* There's only one datapath */
731 dp = shash_first(&dp_netdevs)->data;
732 }
733
734 if (!dp) {
735 ovs_mutex_unlock(&dp_netdev_mutex);
736 unixctl_command_reply_error(conn,
737 "please specify an existing datapath");
738 return;
739 }
740
741 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
ce179f11
IM
742 if (type == PMD_INFO_SHOW_RXQ) {
743 pmd_info_show_rxq(&reply, pmd);
744 } else {
745 unsigned long long stats[DP_N_STATS];
746 uint64_t cycles[PMD_N_CYCLES];
747 int i;
6553d06b 748
ce179f11
IM
749 /* Read current stats and cycle counters */
750 for (i = 0; i < ARRAY_SIZE(stats); i++) {
751 atomic_read_relaxed(&pmd->stats.n[i], &stats[i]);
752 }
753 for (i = 0; i < ARRAY_SIZE(cycles); i++) {
754 atomic_read_relaxed(&pmd->cycles.n[i], &cycles[i]);
755 }
6553d06b 756
ce179f11
IM
757 if (type == PMD_INFO_CLEAR_STATS) {
758 pmd_info_clear_stats(&reply, pmd, stats, cycles);
759 } else if (type == PMD_INFO_SHOW_STATS) {
760 pmd_info_show_stats(&reply, pmd, stats, cycles);
761 }
6553d06b
DDP
762 }
763 }
764
765 ovs_mutex_unlock(&dp_netdev_mutex);
766
767 unixctl_command_reply(conn, ds_cstr(&reply));
768 ds_destroy(&reply);
769}
770\f
771static int
772dpif_netdev_init(void)
773{
774 static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
ce179f11
IM
775 clear_aux = PMD_INFO_CLEAR_STATS,
776 poll_aux = PMD_INFO_SHOW_RXQ;
6553d06b
DDP
777
778 unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]",
779 0, 1, dpif_netdev_pmd_info,
780 (void *)&show_aux);
781 unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]",
782 0, 1, dpif_netdev_pmd_info,
783 (void *)&clear_aux);
ce179f11
IM
784 unixctl_command_register("dpif-netdev/pmd-rxq-show", "[dp]",
785 0, 1, dpif_netdev_pmd_info,
786 (void *)&poll_aux);
6553d06b
DDP
787 return 0;
788}
72865317 789
2197d7ab 790static int
2240af25
DDP
791dpif_netdev_enumerate(struct sset *all_dps,
792 const struct dpif_class *dpif_class)
2197d7ab
GL
793{
794 struct shash_node *node;
795
97be1538 796 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab 797 SHASH_FOR_EACH(node, &dp_netdevs) {
2240af25
DDP
798 struct dp_netdev *dp = node->data;
799 if (dpif_class != dp->class) {
800 /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs.
801 * If the class doesn't match, skip this dpif. */
802 continue;
803 }
2197d7ab
GL
804 sset_add(all_dps, node->name);
805 }
97be1538 806 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 807
2197d7ab
GL
808 return 0;
809}
810
add90f6f
EJ
811static bool
812dpif_netdev_class_is_dummy(const struct dpif_class *class)
813{
814 return class != &dpif_netdev_class;
815}
816
0aeaabc8
JP
817static const char *
818dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
819{
820 return strcmp(type, "internal") ? type
add90f6f 821 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
822 : "tap";
823}
824
72865317
BP
825static struct dpif *
826create_dpif_netdev(struct dp_netdev *dp)
827{
462278db 828 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 829 struct dpif_netdev *dpif;
72865317 830
6a8267c5 831 ovs_refcount_ref(&dp->ref_cnt);
72865317 832
72865317 833 dpif = xmalloc(sizeof *dpif);
614c4892 834 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 835 dpif->dp = dp;
d33ed218 836 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
837
838 return &dpif->dpif;
839}
840
4e022ec0
AW
841/* Choose an unused, non-zero port number and return it on success.
842 * Return ODPP_NONE on failure. */
843static odp_port_t
e44768b7 844choose_port(struct dp_netdev *dp, const char *name)
59e6d833 845 OVS_REQUIRES(dp->port_mutex)
e44768b7 846{
4e022ec0 847 uint32_t port_no;
e44768b7
JP
848
849 if (dp->class != &dpif_netdev_class) {
850 const char *p;
851 int start_no = 0;
852
853 /* If the port name begins with "br", start the number search at
854 * 100 to make writing tests easier. */
855 if (!strncmp(name, "br", 2)) {
856 start_no = 100;
857 }
858
859 /* If the port name contains a number, try to assign that port number.
860 * This can make writing unit tests easier because port numbers are
861 * predictable. */
862 for (p = name; *p != '\0'; p++) {
863 if (isdigit((unsigned char) *p)) {
864 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
865 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
866 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 867 return u32_to_odp(port_no);
e44768b7
JP
868 }
869 break;
870 }
871 }
872 }
873
ff073a71
BP
874 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
875 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 876 return u32_to_odp(port_no);
e44768b7
JP
877 }
878 }
879
4e022ec0 880 return ODPP_NONE;
e44768b7
JP
881}
882
72865317 883static int
614c4892
BP
884create_dp_netdev(const char *name, const struct dpif_class *class,
885 struct dp_netdev **dpp)
8a4e3a85 886 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
887{
888 struct dp_netdev *dp;
889 int error;
72865317 890
462278db 891 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
892 shash_add(&dp_netdevs, name, dp);
893
894 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
895 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5 896 ovs_refcount_init(&dp->ref_cnt);
1a65ba85 897 atomic_flag_clear(&dp->destroyed);
8a4e3a85 898
59e6d833
BP
899 ovs_mutex_init(&dp->port_mutex);
900 cmap_init(&dp->ports);
d33ed218 901 dp->port_seq = seq_create();
6b31e073
RW
902 fat_rwlock_init(&dp->upcall_rwlock);
903
904 /* Disable upcalls by default. */
905 dp_netdev_disable_upcall(dp);
623540e4 906 dp->upcall_aux = NULL;
6b31e073 907 dp->upcall_cb = NULL;
e44768b7 908
65f13b50
AW
909 cmap_init(&dp->poll_threads);
910 ovs_mutex_init_recursive(&dp->non_pmd_mutex);
911 ovsthread_key_create(&dp->per_pmd_key, NULL);
912
f2eee189 913 dp_netdev_set_nonpmd(dp);
65f13b50 914
59e6d833 915 ovs_mutex_lock(&dp->port_mutex);
4e022ec0 916 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
59e6d833 917 ovs_mutex_unlock(&dp->port_mutex);
72865317
BP
918 if (error) {
919 dp_netdev_free(dp);
462278db 920 return error;
72865317
BP
921 }
922
a36de779 923 dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
462278db 924 *dpp = dp;
72865317
BP
925 return 0;
926}
927
928static int
614c4892 929dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 930 bool create, struct dpif **dpifp)
72865317 931{
462278db 932 struct dp_netdev *dp;
5279f8fd 933 int error;
462278db 934
97be1538 935 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
936 dp = shash_find_data(&dp_netdevs, name);
937 if (!dp) {
5279f8fd 938 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 939 } else {
5279f8fd
BP
940 error = (dp->class != class ? EINVAL
941 : create ? EEXIST
942 : 0);
943 }
944 if (!error) {
945 *dpifp = create_dpif_netdev(dp);
6b31e073 946 dp->dpif = *dpifp;
72865317 947 }
97be1538 948 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 949
5279f8fd 950 return error;
72865317
BP
951}
952
88ace79b
DDP
953static void
954dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
955 OVS_NO_THREAD_SAFETY_ANALYSIS
956{
957 /* Check that upcalls are disabled, i.e. that the rwlock is taken */
958 ovs_assert(fat_rwlock_tryrdlock(&dp->upcall_rwlock));
959
960 /* Before freeing a lock we should release it */
961 fat_rwlock_unlock(&dp->upcall_rwlock);
962 fat_rwlock_destroy(&dp->upcall_rwlock);
963}
964
8a4e3a85
BP
965/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
966 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
967static void
968dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 969 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 970{
59e6d833 971 struct dp_netdev_port *port;
4ad28026 972
8a4e3a85
BP
973 shash_find_and_delete(&dp_netdevs, dp->name);
974
65f13b50
AW
975 dp_netdev_destroy_all_pmds(dp);
976 ovs_mutex_destroy(&dp->non_pmd_mutex);
977 ovsthread_key_delete(dp->per_pmd_key);
6c3eee82 978
59e6d833 979 ovs_mutex_lock(&dp->port_mutex);
a532e683 980 CMAP_FOR_EACH (port, node, &dp->ports) {
d916785c 981 /* PMD threads are destroyed here. do_del_port() cannot quiesce */
c40b890f 982 do_del_port(dp, port);
1ba530f4 983 }
59e6d833 984 ovs_mutex_unlock(&dp->port_mutex);
d916785c 985 cmap_destroy(&dp->poll_threads);
51852a57 986
d33ed218 987 seq_destroy(dp->port_seq);
59e6d833 988 cmap_destroy(&dp->ports);
88ace79b
DDP
989
990 /* Upcalls must be disabled at this point */
991 dp_netdev_destroy_upcall_lock(dp);
9bbf1c3d 992
f2eee189 993 free(dp->pmd_cmask);
8a4e3a85 994 free(CONST_CAST(char *, dp->name));
72865317
BP
995 free(dp);
996}
997
8a4e3a85
BP
998static void
999dp_netdev_unref(struct dp_netdev *dp)
1000{
1001 if (dp) {
1002 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
1003 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
1004 ovs_mutex_lock(&dp_netdev_mutex);
24f83812 1005 if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
8a4e3a85
BP
1006 dp_netdev_free(dp);
1007 }
1008 ovs_mutex_unlock(&dp_netdev_mutex);
1009 }
1010}
1011
72865317
BP
1012static void
1013dpif_netdev_close(struct dpif *dpif)
1014{
1015 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 1016
8a4e3a85 1017 dp_netdev_unref(dp);
72865317
BP
1018 free(dpif);
1019}
1020
1021static int
7dab847a 1022dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
1023{
1024 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 1025
6a8267c5 1026 if (!atomic_flag_test_and_set(&dp->destroyed)) {
24f83812 1027 if (ovs_refcount_unref_relaxed(&dp->ref_cnt) == 1) {
6a8267c5
BP
1028 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
1029 OVS_NOT_REACHED();
1030 }
1031 }
5279f8fd 1032
72865317
BP
1033 return 0;
1034}
1035
eb94da30
DDP
1036/* Add 'n' to the atomic variable 'var' non-atomically and using relaxed
1037 * load/store semantics. While the increment is not atomic, the load and
1038 * store operations are, making it impossible to read inconsistent values.
1039 *
1040 * This is used to update thread local stats counters. */
1041static void
1042non_atomic_ullong_add(atomic_ullong *var, unsigned long long n)
1043{
1044 unsigned long long tmp;
1045
1046 atomic_read_relaxed(var, &tmp);
1047 tmp += n;
1048 atomic_store_relaxed(var, tmp);
1049}
1050
72865317 1051static int
a8d9304d 1052dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
1053{
1054 struct dp_netdev *dp = get_dp_netdev(dpif);
1c1e46ed 1055 struct dp_netdev_pmd_thread *pmd;
8a4e3a85 1056
1c1e46ed
AW
1057 stats->n_flows = stats->n_hit = stats->n_missed = stats->n_lost = 0;
1058 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
eb94da30 1059 unsigned long long n;
1c1e46ed 1060 stats->n_flows += cmap_count(&pmd->flow_table);
eb94da30 1061
abcf3ef4
DDP
1062 atomic_read_relaxed(&pmd->stats.n[DP_STAT_MASKED_HIT], &n);
1063 stats->n_hit += n;
1064 atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_HIT], &n);
eb94da30
DDP
1065 stats->n_hit += n;
1066 atomic_read_relaxed(&pmd->stats.n[DP_STAT_MISS], &n);
1067 stats->n_missed += n;
1068 atomic_read_relaxed(&pmd->stats.n[DP_STAT_LOST], &n);
1069 stats->n_lost += n;
51852a57 1070 }
1ce3fa06 1071 stats->n_masks = UINT32_MAX;
847108dc 1072 stats->n_mask_hit = UINT64_MAX;
5279f8fd 1073
72865317
BP
1074 return 0;
1075}
1076
e4cfed38 1077static void
65f13b50 1078dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd)
e4cfed38 1079{
65f13b50
AW
1080 int old_seq;
1081
accf8626
AW
1082 if (pmd->core_id == NON_PMD_CORE_ID) {
1083 return;
1084 }
1085
1086 ovs_mutex_lock(&pmd->cond_mutex);
65f13b50 1087 atomic_add_relaxed(&pmd->change_seq, 1, &old_seq);
accf8626
AW
1088 ovs_mutex_cond_wait(&pmd->cond, &pmd->cond_mutex);
1089 ovs_mutex_unlock(&pmd->cond_mutex);
65f13b50 1090}
e4cfed38 1091
59e6d833
BP
1092static uint32_t
1093hash_port_no(odp_port_t port_no)
1094{
1095 return hash_int(odp_to_u32(port_no), 0);
1096}
1097
72865317 1098static int
c3827f61 1099do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 1100 odp_port_t port_no)
59e6d833 1101 OVS_REQUIRES(dp->port_mutex)
72865317 1102{
4b609110 1103 struct netdev_saved_flags *sf;
72865317
BP
1104 struct dp_netdev_port *port;
1105 struct netdev *netdev;
2499a8ce 1106 enum netdev_flags flags;
0cbfe35d 1107 const char *open_type;
d17f4f08
DDP
1108 int error = 0;
1109 int i, n_open_rxqs = 0;
72865317 1110
17050610
BP
1111 /* Reject devices already in 'dp'. */
1112 if (!get_port_by_name(dp, devname, &port)) {
d17f4f08
DDP
1113 error = EEXIST;
1114 goto out;
17050610 1115 }
72865317
BP
1116
1117 /* Open and validate network device. */
0aeaabc8 1118 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 1119 error = netdev_open(devname, open_type, &netdev);
72865317 1120 if (error) {
d17f4f08 1121 goto out;
72865317 1122 }
72865317
BP
1123 /* XXX reject non-Ethernet devices */
1124
2499a8ce
AC
1125 netdev_get_flags(netdev, &flags);
1126 if (flags & NETDEV_LOOPBACK) {
1127 VLOG_ERR("%s: cannot add a loopback device", devname);
d17f4f08
DDP
1128 error = EINVAL;
1129 goto out_close;
2499a8ce
AC
1130 }
1131
5a034064
AW
1132 if (netdev_is_pmd(netdev)) {
1133 int n_cores = ovs_numa_get_n_cores();
1134
1135 if (n_cores == OVS_CORE_UNSPEC) {
1136 VLOG_ERR("%s, cannot get cpu core info", devname);
d17f4f08
DDP
1137 error = ENOENT;
1138 goto out_close;
5a034064
AW
1139 }
1140 /* There can only be ovs_numa_get_n_cores() pmd threads,
3bcc10c0
DDP
1141 * so creates a txq for each, and one extra for the non
1142 * pmd threads. */
a14b8947
IM
1143 error = netdev_set_multiq(netdev, n_cores + 1,
1144 netdev_requested_n_rxq(netdev));
7251515e 1145 if (error && (error != EOPNOTSUPP)) {
5a034064 1146 VLOG_ERR("%s, cannot set multiq", devname);
d17f4f08 1147 goto out_close;
5a034064
AW
1148 }
1149 }
e4cfed38 1150 port = xzalloc(sizeof *port);
35303d71 1151 port->port_no = port_no;
e4cfed38 1152 port->netdev = netdev;
490e82af
DDP
1153 port->n_rxq = netdev_n_rxq(netdev);
1154 port->rxq = xmalloc(sizeof *port->rxq * port->n_rxq);
e4cfed38 1155 port->type = xstrdup(type);
a14b8947 1156 port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev);
d17f4f08 1157
490e82af 1158 for (i = 0; i < port->n_rxq; i++) {
55c955bd 1159 error = netdev_rxq_open(netdev, &port->rxq[i], i);
d17f4f08 1160 if (error) {
55c955bd
PS
1161 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
1162 devname, ovs_strerror(errno));
d17f4f08 1163 goto out_rxq_close;
55c955bd 1164 }
d17f4f08 1165 n_open_rxqs++;
7b6b0ef4
BP
1166 }
1167
4b609110 1168 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 1169 if (error) {
d17f4f08 1170 goto out_rxq_close;
72865317 1171 }
4b609110 1172 port->sf = sf;
e4cfed38 1173
f7d63652
AW
1174 cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
1175
e4cfed38 1176 if (netdev_is_pmd(netdev)) {
cc245ce8 1177 dp_netdev_add_port_to_pmds(dp, port);
e4cfed38 1178 }
d33ed218 1179 seq_change(dp->port_seq);
72865317
BP
1180
1181 return 0;
d17f4f08
DDP
1182
1183out_rxq_close:
1184 for (i = 0; i < n_open_rxqs; i++) {
1185 netdev_rxq_close(port->rxq[i]);
1186 }
1187 free(port->type);
1188 free(port->rxq);
1189 free(port);
1190out_close:
1191 netdev_close(netdev);
1192out:
1193 return error;
72865317
BP
1194}
1195
247527db
BP
1196static int
1197dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 1198 odp_port_t *port_nop)
247527db
BP
1199{
1200 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
1201 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1202 const char *dpif_port;
4e022ec0 1203 odp_port_t port_no;
5279f8fd 1204 int error;
247527db 1205
59e6d833 1206 ovs_mutex_lock(&dp->port_mutex);
3aa30359 1207 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 1208 if (*port_nop != ODPP_NONE) {
ff073a71
BP
1209 port_no = *port_nop;
1210 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 1211 } else {
3aa30359 1212 port_no = choose_port(dp, dpif_port);
5279f8fd 1213 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 1214 }
5279f8fd 1215 if (!error) {
247527db 1216 *port_nop = port_no;
5279f8fd 1217 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 1218 }
59e6d833 1219 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
1220
1221 return error;
72865317
BP
1222}
1223
1224static int
4e022ec0 1225dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
1226{
1227 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
1228 int error;
1229
59e6d833 1230 ovs_mutex_lock(&dp->port_mutex);
c40b890f
BP
1231 if (port_no == ODPP_LOCAL) {
1232 error = EINVAL;
1233 } else {
1234 struct dp_netdev_port *port;
1235
1236 error = get_port_by_number(dp, port_no, &port);
1237 if (!error) {
1238 do_del_port(dp, port);
1239 }
1240 }
59e6d833 1241 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
1242
1243 return error;
72865317
BP
1244}
1245
1246static bool
4e022ec0 1247is_valid_port_number(odp_port_t port_no)
72865317 1248{
ff073a71
BP
1249 return port_no != ODPP_NONE;
1250}
1251
1252static struct dp_netdev_port *
1253dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
1254{
1255 struct dp_netdev_port *port;
1256
59e6d833 1257 CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
35303d71 1258 if (port->port_no == port_no) {
ff073a71
BP
1259 return port;
1260 }
1261 }
1262 return NULL;
72865317
BP
1263}
1264
1265static int
1266get_port_by_number(struct dp_netdev *dp,
4e022ec0 1267 odp_port_t port_no, struct dp_netdev_port **portp)
72865317
BP
1268{
1269 if (!is_valid_port_number(port_no)) {
1270 *portp = NULL;
1271 return EINVAL;
1272 } else {
ff073a71 1273 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
1274 return *portp ? 0 : ENOENT;
1275 }
1276}
1277
b284085e 1278static void
62453dad 1279port_destroy(struct dp_netdev_port *port)
b284085e 1280{
62453dad
DDP
1281 if (!port) {
1282 return;
b284085e 1283 }
b284085e 1284
62453dad
DDP
1285 netdev_close(port->netdev);
1286 netdev_restore_flags(port->sf);
accf8626 1287
62453dad
DDP
1288 for (unsigned i = 0; i < port->n_rxq; i++) {
1289 netdev_rxq_close(port->rxq[i]);
b284085e 1290 }
62453dad
DDP
1291
1292 free(port->rxq);
1293 free(port->type);
1294 free(port);
b284085e
PS
1295}
1296
72865317
BP
1297static int
1298get_port_by_name(struct dp_netdev *dp,
1299 const char *devname, struct dp_netdev_port **portp)
59e6d833 1300 OVS_REQUIRES(dp->port_mutex)
72865317
BP
1301{
1302 struct dp_netdev_port *port;
1303
a532e683 1304 CMAP_FOR_EACH (port, node, &dp->ports) {
3efb6063 1305 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
1306 *portp = port;
1307 return 0;
1308 }
1309 }
1310 return ENOENT;
1311}
1312
347ba9bb
IM
1313static int
1314get_n_pmd_threads(struct dp_netdev *dp)
1315{
1316 /* There is one non pmd thread in dp->poll_threads */
1317 return cmap_count(&dp->poll_threads) - 1;
1318}
1319
65f13b50
AW
1320static int
1321get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id)
1322{
1323 struct dp_netdev_pmd_thread *pmd;
1324 int n_pmds = 0;
1325
1326 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
1327 if (pmd->numa_id == numa_id) {
1328 n_pmds++;
1329 }
1330 }
1331
1332 return n_pmds;
1333}
1334
1335/* Returns 'true' if there is a port with pmd netdev and the netdev
1336 * is on numa node 'numa_id'. */
1337static bool
1338has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id)
1339{
1340 struct dp_netdev_port *port;
1341
1342 CMAP_FOR_EACH (port, node, &dp->ports) {
1343 if (netdev_is_pmd(port->netdev)
1344 && netdev_get_numa_id(port->netdev) == numa_id) {
1345 return true;
1346 }
1347 }
1348
1349 return false;
1350}
1351
1352
c40b890f
BP
1353static void
1354do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
59e6d833 1355 OVS_REQUIRES(dp->port_mutex)
72865317 1356{
35303d71 1357 cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
d33ed218 1358 seq_change(dp->port_seq);
e4cfed38 1359 if (netdev_is_pmd(port->netdev)) {
65f13b50
AW
1360 int numa_id = netdev_get_numa_id(port->netdev);
1361
ae7ad0a1
IM
1362 /* PMD threads can not be on invalid numa node. */
1363 ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
65f13b50 1364 /* If there is no netdev on the numa node, deletes the pmd threads
ae7ad0a1 1365 * for that numa. Else, deletes the queues from polling lists. */
65f13b50
AW
1366 if (!has_pmd_port_for_numa(dp, numa_id)) {
1367 dp_netdev_del_pmds_on_numa(dp, numa_id);
ae7ad0a1 1368 } else {
cc245ce8 1369 dp_netdev_del_port_from_all_pmds(dp, port);
65f13b50 1370 }
e4cfed38 1371 }
72865317 1372
62453dad 1373 port_destroy(port);
72865317
BP
1374}
1375
1376static void
4c738a8d
BP
1377answer_port_query(const struct dp_netdev_port *port,
1378 struct dpif_port *dpif_port)
72865317 1379{
3efb6063 1380 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 1381 dpif_port->type = xstrdup(port->type);
35303d71 1382 dpif_port->port_no = port->port_no;
72865317
BP
1383}
1384
1385static int
4e022ec0 1386dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 1387 struct dpif_port *dpif_port)
72865317
BP
1388{
1389 struct dp_netdev *dp = get_dp_netdev(dpif);
1390 struct dp_netdev_port *port;
1391 int error;
1392
1393 error = get_port_by_number(dp, port_no, &port);
4afba28d 1394 if (!error && dpif_port) {
4c738a8d 1395 answer_port_query(port, dpif_port);
72865317 1396 }
5279f8fd 1397
72865317
BP
1398 return error;
1399}
1400
1401static int
1402dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 1403 struct dpif_port *dpif_port)
72865317
BP
1404{
1405 struct dp_netdev *dp = get_dp_netdev(dpif);
1406 struct dp_netdev_port *port;
1407 int error;
1408
59e6d833 1409 ovs_mutex_lock(&dp->port_mutex);
72865317 1410 error = get_port_by_name(dp, devname, &port);
4afba28d 1411 if (!error && dpif_port) {
4c738a8d 1412 answer_port_query(port, dpif_port);
72865317 1413 }
59e6d833 1414 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd 1415
72865317
BP
1416 return error;
1417}
1418
61e7deb1
BP
1419static void
1420dp_netdev_flow_free(struct dp_netdev_flow *flow)
1421{
61e7deb1 1422 dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
61e7deb1
BP
1423 free(flow);
1424}
1425
ed79f89a
DDP
1426static void dp_netdev_flow_unref(struct dp_netdev_flow *flow)
1427{
1428 if (ovs_refcount_unref_relaxed(&flow->ref_cnt) == 1) {
1429 ovsrcu_postpone(dp_netdev_flow_free, flow);
1430 }
1431}
1432
70e5ed6f
JS
1433static uint32_t
1434dp_netdev_flow_hash(const ovs_u128 *ufid)
1435{
1436 return ufid->u32[0];
1437}
1438
72865317 1439static void
1c1e46ed
AW
1440dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
1441 struct dp_netdev_flow *flow)
1442 OVS_REQUIRES(pmd->flow_mutex)
72865317 1443{
9f361d6b 1444 struct cmap_node *node = CONST_CAST(struct cmap_node *, &flow->node);
2c0ea78f 1445
1c1e46ed
AW
1446 dpcls_remove(&pmd->cls, &flow->cr);
1447 cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
9bbf1c3d 1448 flow->dead = true;
ed79f89a
DDP
1449
1450 dp_netdev_flow_unref(flow);
72865317
BP
1451}
1452
1453static void
1c1e46ed 1454dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd)
72865317 1455{
78c8df12 1456 struct dp_netdev_flow *netdev_flow;
72865317 1457
1c1e46ed
AW
1458 ovs_mutex_lock(&pmd->flow_mutex);
1459 CMAP_FOR_EACH (netdev_flow, node, &pmd->flow_table) {
1460 dp_netdev_pmd_remove_flow(pmd, netdev_flow);
72865317 1461 }
1c1e46ed 1462 ovs_mutex_unlock(&pmd->flow_mutex);
72865317
BP
1463}
1464
1465static int
1466dpif_netdev_flow_flush(struct dpif *dpif)
1467{
1468 struct dp_netdev *dp = get_dp_netdev(dpif);
1c1e46ed
AW
1469 struct dp_netdev_pmd_thread *pmd;
1470
1471 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
1472 dp_netdev_pmd_flow_flush(pmd);
1473 }
5279f8fd 1474
72865317
BP
1475 return 0;
1476}
1477
b0ec0f27 1478struct dp_netdev_port_state {
59e6d833 1479 struct cmap_position position;
4c738a8d 1480 char *name;
b0ec0f27
BP
1481};
1482
1483static int
1484dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
1485{
1486 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
1487 return 0;
1488}
1489
72865317 1490static int
b0ec0f27 1491dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 1492 struct dpif_port *dpif_port)
72865317 1493{
b0ec0f27 1494 struct dp_netdev_port_state *state = state_;
72865317 1495 struct dp_netdev *dp = get_dp_netdev(dpif);
59e6d833 1496 struct cmap_node *node;
ff073a71 1497 int retval;
72865317 1498
59e6d833 1499 node = cmap_next_position(&dp->ports, &state->position);
ff073a71
BP
1500 if (node) {
1501 struct dp_netdev_port *port;
5279f8fd 1502
ff073a71
BP
1503 port = CONTAINER_OF(node, struct dp_netdev_port, node);
1504
1505 free(state->name);
1506 state->name = xstrdup(netdev_get_name(port->netdev));
1507 dpif_port->name = state->name;
1508 dpif_port->type = port->type;
35303d71 1509 dpif_port->port_no = port->port_no;
ff073a71
BP
1510
1511 retval = 0;
1512 } else {
1513 retval = EOF;
72865317 1514 }
5279f8fd 1515
ff073a71 1516 return retval;
b0ec0f27
BP
1517}
1518
1519static int
4c738a8d 1520dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 1521{
4c738a8d
BP
1522 struct dp_netdev_port_state *state = state_;
1523 free(state->name);
b0ec0f27
BP
1524 free(state);
1525 return 0;
72865317
BP
1526}
1527
1528static int
67a4917b 1529dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
1530{
1531 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 1532 uint64_t new_port_seq;
5279f8fd
BP
1533 int error;
1534
d33ed218
BP
1535 new_port_seq = seq_read(dpif->dp->port_seq);
1536 if (dpif->last_port_seq != new_port_seq) {
1537 dpif->last_port_seq = new_port_seq;
5279f8fd 1538 error = ENOBUFS;
72865317 1539 } else {
5279f8fd 1540 error = EAGAIN;
72865317 1541 }
5279f8fd
BP
1542
1543 return error;
72865317
BP
1544}
1545
1546static void
1547dpif_netdev_port_poll_wait(const struct dpif *dpif_)
1548{
1549 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 1550
d33ed218 1551 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
1552}
1553
1554static struct dp_netdev_flow *
0de8783a 1555dp_netdev_flow_cast(const struct dpcls_rule *cr)
8a4e3a85
BP
1556{
1557 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
1558}
1559
9bbf1c3d
DDP
1560static bool dp_netdev_flow_ref(struct dp_netdev_flow *flow)
1561{
1562 return ovs_refcount_try_ref_rcu(&flow->ref_cnt);
1563}
1564
79df317f
DDP
1565/* netdev_flow_key utilities.
1566 *
1567 * netdev_flow_key is basically a miniflow. We use these functions
1568 * (netdev_flow_key_clone, netdev_flow_key_equal, ...) instead of the miniflow
1569 * functions (miniflow_clone_inline, miniflow_equal, ...), because:
1570 *
1571 * - Since we are dealing exclusively with miniflows created by
1572 * miniflow_extract(), if the map is different the miniflow is different.
1573 * Therefore we can be faster by comparing the map and the miniflow in a
1574 * single memcmp().
5fcff47b 1575 * - These functions can be inlined by the compiler. */
79df317f 1576
361d808d 1577/* Given the number of bits set in miniflow's maps, returns the size of the
caeb4906 1578 * 'netdev_flow_key.mf' */
361d808d
JR
1579static inline size_t
1580netdev_flow_key_size(size_t flow_u64s)
79df317f 1581{
361d808d 1582 return sizeof(struct miniflow) + MINIFLOW_VALUES_SIZE(flow_u64s);
79df317f
DDP
1583}
1584
79df317f
DDP
1585static inline bool
1586netdev_flow_key_equal(const struct netdev_flow_key *a,
0de8783a
JR
1587 const struct netdev_flow_key *b)
1588{
caeb4906
JR
1589 /* 'b->len' may be not set yet. */
1590 return a->hash == b->hash && !memcmp(&a->mf, &b->mf, a->len);
0de8783a
JR
1591}
1592
1593/* Used to compare 'netdev_flow_key' in the exact match cache to a miniflow.
d79a39fe 1594 * The maps are compared bitwise, so both 'key->mf' and 'mf' must have been
0de8783a
JR
1595 * generated by miniflow_extract. */
1596static inline bool
1597netdev_flow_key_equal_mf(const struct netdev_flow_key *key,
1598 const struct miniflow *mf)
79df317f 1599{
caeb4906 1600 return !memcmp(&key->mf, mf, key->len);
79df317f
DDP
1601}
1602
1603static inline void
1604netdev_flow_key_clone(struct netdev_flow_key *dst,
0de8783a
JR
1605 const struct netdev_flow_key *src)
1606{
caeb4906
JR
1607 memcpy(dst, src,
1608 offsetof(struct netdev_flow_key, mf) + src->len);
0de8783a
JR
1609}
1610
1611/* Slow. */
1612static void
1613netdev_flow_key_from_flow(struct netdev_flow_key *dst,
1614 const struct flow *src)
1615{
cf62fa4c 1616 struct dp_packet packet;
0de8783a 1617 uint64_t buf_stub[512 / 8];
0de8783a 1618
cf62fa4c
PS
1619 dp_packet_use_stub(&packet, buf_stub, sizeof buf_stub);
1620 pkt_metadata_from_flow(&packet.md, src);
0de8783a 1621 flow_compose(&packet, src);
cf62fa4c
PS
1622 miniflow_extract(&packet, &dst->mf);
1623 dp_packet_uninit(&packet);
0de8783a 1624
361d808d 1625 dst->len = netdev_flow_key_size(miniflow_n_values(&dst->mf));
0de8783a
JR
1626 dst->hash = 0; /* Not computed yet. */
1627}
1628
1629/* Initialize a netdev_flow_key 'mask' from 'match'. */
1630static inline void
1631netdev_flow_mask_init(struct netdev_flow_key *mask,
1632 const struct match *match)
1633{
09b0fa9c 1634 uint64_t *dst = miniflow_values(&mask->mf);
5fcff47b 1635 struct flowmap fmap;
0de8783a 1636 uint32_t hash = 0;
5fcff47b 1637 size_t idx;
0de8783a
JR
1638
1639 /* Only check masks that make sense for the flow. */
5fcff47b
JR
1640 flow_wc_map(&match->flow, &fmap);
1641 flowmap_init(&mask->mf.map);
0de8783a 1642
5fcff47b
JR
1643 FLOWMAP_FOR_EACH_INDEX(idx, fmap) {
1644 uint64_t mask_u64 = flow_u64_value(&match->wc.masks, idx);
0de8783a 1645
5fcff47b
JR
1646 if (mask_u64) {
1647 flowmap_set(&mask->mf.map, idx, 1);
1648 *dst++ = mask_u64;
1649 hash = hash_add64(hash, mask_u64);
0de8783a 1650 }
0de8783a
JR
1651 }
1652
5fcff47b 1653 map_t map;
0de8783a 1654
5fcff47b
JR
1655 FLOWMAP_FOR_EACH_MAP (map, mask->mf.map) {
1656 hash = hash_add64(hash, map);
1657 }
0de8783a 1658
5fcff47b 1659 size_t n = dst - miniflow_get_values(&mask->mf);
0de8783a 1660
d70e8c28 1661 mask->hash = hash_finish(hash, n * 8);
0de8783a
JR
1662 mask->len = netdev_flow_key_size(n);
1663}
1664
361d808d 1665/* Initializes 'dst' as a copy of 'flow' masked with 'mask'. */
0de8783a
JR
1666static inline void
1667netdev_flow_key_init_masked(struct netdev_flow_key *dst,
1668 const struct flow *flow,
1669 const struct netdev_flow_key *mask)
79df317f 1670{
09b0fa9c
JR
1671 uint64_t *dst_u64 = miniflow_values(&dst->mf);
1672 const uint64_t *mask_u64 = miniflow_get_values(&mask->mf);
0de8783a 1673 uint32_t hash = 0;
d70e8c28 1674 uint64_t value;
0de8783a
JR
1675
1676 dst->len = mask->len;
361d808d 1677 dst->mf = mask->mf; /* Copy maps. */
0de8783a 1678
5fcff47b 1679 FLOW_FOR_EACH_IN_MAPS(value, flow, mask->mf.map) {
d70e8c28
JR
1680 *dst_u64 = value & *mask_u64++;
1681 hash = hash_add64(hash, *dst_u64++);
0de8783a 1682 }
09b0fa9c
JR
1683 dst->hash = hash_finish(hash,
1684 (dst_u64 - miniflow_get_values(&dst->mf)) * 8);
0de8783a
JR
1685}
1686
5fcff47b
JR
1687/* Iterate through netdev_flow_key TNL u64 values specified by 'FLOWMAP'. */
1688#define NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(VALUE, KEY, FLOWMAP) \
1689 MINIFLOW_FOR_EACH_IN_FLOWMAP(VALUE, &(KEY)->mf, FLOWMAP)
0de8783a
JR
1690
1691/* Returns a hash value for the bits of 'key' where there are 1-bits in
1692 * 'mask'. */
1693static inline uint32_t
1694netdev_flow_key_hash_in_mask(const struct netdev_flow_key *key,
1695 const struct netdev_flow_key *mask)
1696{
09b0fa9c 1697 const uint64_t *p = miniflow_get_values(&mask->mf);
0de8783a 1698 uint32_t hash = 0;
5fcff47b 1699 uint64_t value;
0de8783a 1700
5fcff47b
JR
1701 NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(value, key, mask->mf.map) {
1702 hash = hash_add64(hash, value & *p++);
0de8783a
JR
1703 }
1704
09b0fa9c 1705 return hash_finish(hash, (p - miniflow_get_values(&mask->mf)) * 8);
79df317f
DDP
1706}
1707
9bbf1c3d
DDP
1708static inline bool
1709emc_entry_alive(struct emc_entry *ce)
1710{
1711 return ce->flow && !ce->flow->dead;
1712}
1713
1714static void
1715emc_clear_entry(struct emc_entry *ce)
1716{
1717 if (ce->flow) {
1718 dp_netdev_flow_unref(ce->flow);
1719 ce->flow = NULL;
1720 }
1721}
1722
1723static inline void
1724emc_change_entry(struct emc_entry *ce, struct dp_netdev_flow *flow,
0de8783a 1725 const struct netdev_flow_key *key)
9bbf1c3d
DDP
1726{
1727 if (ce->flow != flow) {
1728 if (ce->flow) {
1729 dp_netdev_flow_unref(ce->flow);
1730 }
1731
1732 if (dp_netdev_flow_ref(flow)) {
1733 ce->flow = flow;
1734 } else {
1735 ce->flow = NULL;
1736 }
1737 }
0de8783a
JR
1738 if (key) {
1739 netdev_flow_key_clone(&ce->key, key);
9bbf1c3d
DDP
1740 }
1741}
1742
1743static inline void
0de8783a 1744emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
9bbf1c3d
DDP
1745 struct dp_netdev_flow *flow)
1746{
1747 struct emc_entry *to_be_replaced = NULL;
1748 struct emc_entry *current_entry;
1749
0de8783a
JR
1750 EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
1751 if (netdev_flow_key_equal(&current_entry->key, key)) {
9bbf1c3d 1752 /* We found the entry with the 'mf' miniflow */
0de8783a 1753 emc_change_entry(current_entry, flow, NULL);
9bbf1c3d
DDP
1754 return;
1755 }
1756
1757 /* Replacement policy: put the flow in an empty (not alive) entry, or
1758 * in the first entry where it can be */
1759 if (!to_be_replaced
1760 || (emc_entry_alive(to_be_replaced)
1761 && !emc_entry_alive(current_entry))
0de8783a 1762 || current_entry->key.hash < to_be_replaced->key.hash) {
9bbf1c3d
DDP
1763 to_be_replaced = current_entry;
1764 }
1765 }
1766 /* We didn't find the miniflow in the cache.
1767 * The 'to_be_replaced' entry is where the new flow will be stored */
1768
0de8783a 1769 emc_change_entry(to_be_replaced, flow, key);
9bbf1c3d
DDP
1770}
1771
1772static inline struct dp_netdev_flow *
0de8783a 1773emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
9bbf1c3d
DDP
1774{
1775 struct emc_entry *current_entry;
1776
0de8783a
JR
1777 EMC_FOR_EACH_POS_WITH_HASH(cache, current_entry, key->hash) {
1778 if (current_entry->key.hash == key->hash
1779 && emc_entry_alive(current_entry)
1780 && netdev_flow_key_equal_mf(&current_entry->key, &key->mf)) {
9bbf1c3d 1781
0de8783a 1782 /* We found the entry with the 'key->mf' miniflow */
9bbf1c3d
DDP
1783 return current_entry->flow;
1784 }
1785 }
1786
1787 return NULL;
1788}
1789
72865317 1790static struct dp_netdev_flow *
1c1e46ed
AW
1791dp_netdev_pmd_lookup_flow(const struct dp_netdev_pmd_thread *pmd,
1792 const struct netdev_flow_key *key)
2c0ea78f 1793{
8a4e3a85 1794 struct dp_netdev_flow *netdev_flow;
0de8783a 1795 struct dpcls_rule *rule;
2c0ea78f 1796
1c1e46ed 1797 dpcls_lookup(&pmd->cls, key, &rule, 1);
4f150744 1798 netdev_flow = dp_netdev_flow_cast(rule);
2c0ea78f 1799
8a4e3a85 1800 return netdev_flow;
2c0ea78f
GS
1801}
1802
1803static struct dp_netdev_flow *
1c1e46ed
AW
1804dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
1805 const ovs_u128 *ufidp, const struct nlattr *key,
1806 size_t key_len)
72865317 1807{
1763b4b8 1808 struct dp_netdev_flow *netdev_flow;
70e5ed6f
JS
1809 struct flow flow;
1810 ovs_u128 ufid;
1811
1812 /* If a UFID is not provided, determine one based on the key. */
1813 if (!ufidp && key && key_len
1814 && !dpif_netdev_flow_from_nlattrs(key, key_len, &flow)) {
1c1e46ed 1815 dpif_flow_hash(pmd->dp->dpif, &flow, sizeof flow, &ufid);
70e5ed6f
JS
1816 ufidp = &ufid;
1817 }
72865317 1818
70e5ed6f
JS
1819 if (ufidp) {
1820 CMAP_FOR_EACH_WITH_HASH (netdev_flow, node, dp_netdev_flow_hash(ufidp),
1c1e46ed 1821 &pmd->flow_table) {
2ff8484b 1822 if (ovs_u128_equals(netdev_flow->ufid, *ufidp)) {
70e5ed6f
JS
1823 return netdev_flow;
1824 }
72865317
BP
1825 }
1826 }
8a4e3a85 1827
72865317
BP
1828 return NULL;
1829}
1830
1831static void
eb94da30 1832get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
1763b4b8 1833 struct dpif_flow_stats *stats)
feebdea2 1834{
eb94da30
DDP
1835 struct dp_netdev_flow *netdev_flow;
1836 unsigned long long n;
1837 long long used;
1838 uint16_t flags;
1839
1840 netdev_flow = CONST_CAST(struct dp_netdev_flow *, netdev_flow_);
1841
1842 atomic_read_relaxed(&netdev_flow->stats.packet_count, &n);
1843 stats->n_packets = n;
1844 atomic_read_relaxed(&netdev_flow->stats.byte_count, &n);
1845 stats->n_bytes = n;
1846 atomic_read_relaxed(&netdev_flow->stats.used, &used);
1847 stats->used = used;
1848 atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
1849 stats->tcp_flags = flags;
72865317
BP
1850}
1851
7af12bd7
JS
1852/* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for
1853 * storing the netlink-formatted key/mask. 'key_buf' may be the same as
1854 * 'mask_buf'. Actions will be returned without copying, by relying on RCU to
1855 * protect them. */
6fe09f8c 1856static void
70e5ed6f 1857dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
7af12bd7 1858 struct ofpbuf *key_buf, struct ofpbuf *mask_buf,
64bb477f 1859 struct dpif_flow *flow, bool terse)
6fe09f8c 1860{
64bb477f
JS
1861 if (terse) {
1862 memset(flow, 0, sizeof *flow);
1863 } else {
1864 struct flow_wildcards wc;
1865 struct dp_netdev_actions *actions;
1866 size_t offset;
5262eea1
JG
1867 struct odp_flow_key_parms odp_parms = {
1868 .flow = &netdev_flow->flow,
1869 .mask = &wc.masks,
2494ccd7 1870 .support = dp_netdev_support,
5262eea1 1871 };
64bb477f
JS
1872
1873 miniflow_expand(&netdev_flow->cr.mask->mf, &wc.masks);
1874
1875 /* Key */
6fd6ed71 1876 offset = key_buf->size;
64bb477f 1877 flow->key = ofpbuf_tail(key_buf);
5262eea1
JG
1878 odp_parms.odp_in_port = netdev_flow->flow.in_port.odp_port;
1879 odp_flow_key_from_flow(&odp_parms, key_buf);
6fd6ed71 1880 flow->key_len = key_buf->size - offset;
64bb477f
JS
1881
1882 /* Mask */
6fd6ed71 1883 offset = mask_buf->size;
64bb477f 1884 flow->mask = ofpbuf_tail(mask_buf);
5262eea1 1885 odp_parms.odp_in_port = wc.masks.in_port.odp_port;
ec1f6f32 1886 odp_parms.key_buf = key_buf;
5262eea1 1887 odp_flow_key_from_mask(&odp_parms, mask_buf);
6fd6ed71 1888 flow->mask_len = mask_buf->size - offset;
64bb477f
JS
1889
1890 /* Actions */
1891 actions = dp_netdev_flow_get_actions(netdev_flow);
1892 flow->actions = actions->actions;
1893 flow->actions_len = actions->size;
1894 }
6fe09f8c 1895
70e5ed6f
JS
1896 flow->ufid = netdev_flow->ufid;
1897 flow->ufid_present = true;
1c1e46ed 1898 flow->pmd_id = netdev_flow->pmd_id;
6fe09f8c
JS
1899 get_dpif_flow_stats(netdev_flow, &flow->stats);
1900}
1901
36956a7d 1902static int
8c301900
JR
1903dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1904 const struct nlattr *mask_key,
1905 uint32_t mask_key_len, const struct flow *flow,
9f861c91 1906 struct flow_wildcards *wc)
8c301900 1907{
ca8d3442
DDP
1908 enum odp_key_fitness fitness;
1909
1910 fitness = odp_flow_key_to_mask_udpif(mask_key, mask_key_len, key,
1911 key_len, wc, flow);
1912 if (fitness) {
1913 /* This should not happen: it indicates that
1914 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1915 * disagree on the acceptable form of a mask. Log the problem
1916 * as an error, with enough details to enable debugging. */
1917 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1918
1919 if (!VLOG_DROP_ERR(&rl)) {
1920 struct ds s;
8c301900 1921
ca8d3442
DDP
1922 ds_init(&s);
1923 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1924 true);
1925 VLOG_ERR("internal error parsing flow mask %s (%s)",
1926 ds_cstr(&s), odp_key_fitness_to_string(fitness));
1927 ds_destroy(&s);
8c301900 1928 }
ca8d3442
DDP
1929
1930 return EINVAL;
8c301900
JR
1931 }
1932
1933 return 0;
1934}
1935
1936static int
1937dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1938 struct flow *flow)
36956a7d 1939{
586ddea5
BP
1940 odp_port_t in_port;
1941
6728d578 1942 if (odp_flow_key_to_flow_udpif(key, key_len, flow)) {
36956a7d 1943 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1944 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1945 * flow. Log the problem as an error, with enough details to enable
1946 * debugging. */
36956a7d
BP
1947 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1948
1949 if (!VLOG_DROP_ERR(&rl)) {
1950 struct ds s;
1951
1952 ds_init(&s);
8c301900 1953 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1954 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1955 ds_destroy(&s);
1956 }
1957
1958 return EINVAL;
1959 }
1960
586ddea5
BP
1961 in_port = flow->in_port.odp_port;
1962 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1963 return EINVAL;
1964 }
1965
07659514 1966 /* Userspace datapath doesn't support conntrack. */
9daf2348 1967 if (flow->ct_state || flow->ct_zone || flow->ct_mark
2ff8484b 1968 || !ovs_u128_is_zero(flow->ct_label)) {
07659514
JS
1969 return EINVAL;
1970 }
1971
36956a7d
BP
1972 return 0;
1973}
1974
72865317 1975static int
6fe09f8c 1976dpif_netdev_flow_get(const struct dpif *dpif, const struct dpif_flow_get *get)
72865317
BP
1977{
1978 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1979 struct dp_netdev_flow *netdev_flow;
1c1e46ed 1980 struct dp_netdev_pmd_thread *pmd;
bd5131ba
DDP
1981 unsigned pmd_id = get->pmd_id == PMD_ID_NULL
1982 ? NON_PMD_CORE_ID : get->pmd_id;
70e5ed6f 1983 int error = 0;
8a4e3a85 1984
1c1e46ed
AW
1985 pmd = dp_netdev_get_pmd(dp, pmd_id);
1986 if (!pmd) {
1987 return EINVAL;
1988 }
1989
1990 netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
1991 get->key_len);
1763b4b8 1992 if (netdev_flow) {
70e5ed6f 1993 dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
64bb477f 1994 get->flow, false);
70e5ed6f 1995 } else {
5279f8fd 1996 error = ENOENT;
72865317 1997 }
1c1e46ed
AW
1998 dp_netdev_pmd_unref(pmd);
1999
bc4a05c6 2000
5279f8fd 2001 return error;
72865317
BP
2002}
2003
0de8783a 2004static struct dp_netdev_flow *
1c1e46ed
AW
2005dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
2006 struct match *match, const ovs_u128 *ufid,
ae2ceebd 2007 const struct nlattr *actions, size_t actions_len)
1c1e46ed 2008 OVS_REQUIRES(pmd->flow_mutex)
72865317 2009{
0de8783a
JR
2010 struct dp_netdev_flow *flow;
2011 struct netdev_flow_key mask;
ed79f89a 2012
0de8783a
JR
2013 netdev_flow_mask_init(&mask, match);
2014 /* Make sure wc does not have metadata. */
5fcff47b
JR
2015 ovs_assert(!FLOWMAP_HAS_FIELD(&mask.mf.map, metadata)
2016 && !FLOWMAP_HAS_FIELD(&mask.mf.map, regs));
679ba04c 2017
0de8783a 2018 /* Do not allocate extra space. */
caeb4906 2019 flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
1c1e46ed 2020 memset(&flow->stats, 0, sizeof flow->stats);
0de8783a 2021 flow->dead = false;
11e5cf1f 2022 flow->batch = NULL;
bd5131ba 2023 *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
0de8783a 2024 *CONST_CAST(struct flow *, &flow->flow) = match->flow;
70e5ed6f 2025 *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
0de8783a 2026 ovs_refcount_init(&flow->ref_cnt);
0de8783a 2027 ovsrcu_set(&flow->actions, dp_netdev_actions_create(actions, actions_len));
2c0ea78f 2028
0de8783a 2029 netdev_flow_key_init_masked(&flow->cr.flow, &match->flow, &mask);
1c1e46ed 2030 dpcls_insert(&pmd->cls, &flow->cr, &mask);
72865317 2031
4c75aaab
EJ
2032 cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node),
2033 dp_netdev_flow_hash(&flow->ufid));
2034
623540e4 2035 if (OVS_UNLIKELY(VLOG_IS_DBG_ENABLED())) {
0de8783a 2036 struct match match;
623540e4
EJ
2037 struct ds ds = DS_EMPTY_INITIALIZER;
2038
4d8f90b1 2039 match.tun_md.valid = false;
0de8783a
JR
2040 match.flow = flow->flow;
2041 miniflow_expand(&flow->cr.mask->mf, &match.wc.masks);
2042
623540e4 2043 ds_put_cstr(&ds, "flow_add: ");
70e5ed6f
JS
2044 odp_format_ufid(ufid, &ds);
2045 ds_put_cstr(&ds, " ");
0de8783a 2046 match_format(&match, &ds, OFP_DEFAULT_PRIORITY);
623540e4
EJ
2047 ds_put_cstr(&ds, ", actions:");
2048 format_odp_actions(&ds, actions, actions_len);
2049
2050 VLOG_DBG_RL(&upcall_rl, "%s", ds_cstr(&ds));
2051
2052 ds_destroy(&ds);
2053 }
2054
0de8783a 2055 return flow;
72865317
BP
2056}
2057
72865317 2058static int
89625d1e 2059dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
2060{
2061 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 2062 struct dp_netdev_flow *netdev_flow;
0de8783a 2063 struct netdev_flow_key key;
1c1e46ed 2064 struct dp_netdev_pmd_thread *pmd;
ae2ceebd 2065 struct match match;
70e5ed6f 2066 ovs_u128 ufid;
bd5131ba
DDP
2067 unsigned pmd_id = put->pmd_id == PMD_ID_NULL
2068 ? NON_PMD_CORE_ID : put->pmd_id;
36956a7d
BP
2069 int error;
2070
ae2ceebd 2071 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &match.flow);
8c301900
JR
2072 if (error) {
2073 return error;
2074 }
2075 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
2076 put->mask, put->mask_len,
9f861c91 2077 &match.flow, &match.wc);
36956a7d
BP
2078 if (error) {
2079 return error;
2080 }
0de8783a 2081
1c1e46ed
AW
2082 pmd = dp_netdev_get_pmd(dp, pmd_id);
2083 if (!pmd) {
2084 return EINVAL;
2085 }
2086
0de8783a
JR
2087 /* Must produce a netdev_flow_key for lookup.
2088 * This interface is no longer performance critical, since it is not used
2089 * for upcall processing any more. */
2090 netdev_flow_key_from_flow(&key, &match.flow);
72865317 2091
70e5ed6f
JS
2092 if (put->ufid) {
2093 ufid = *put->ufid;
2094 } else {
2095 dpif_flow_hash(dpif, &match.flow, sizeof match.flow, &ufid);
2096 }
2097
1c1e46ed
AW
2098 ovs_mutex_lock(&pmd->flow_mutex);
2099 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &key);
1763b4b8 2100 if (!netdev_flow) {
89625d1e 2101 if (put->flags & DPIF_FP_CREATE) {
1c1e46ed 2102 if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
89625d1e
BP
2103 if (put->stats) {
2104 memset(put->stats, 0, sizeof *put->stats);
feebdea2 2105 }
1c1e46ed 2106 dp_netdev_flow_add(pmd, &match, &ufid, put->actions,
70e5ed6f 2107 put->actions_len);
0de8783a 2108 error = 0;
72865317 2109 } else {
5279f8fd 2110 error = EFBIG;
72865317
BP
2111 }
2112 } else {
5279f8fd 2113 error = ENOENT;
72865317
BP
2114 }
2115 } else {
2c0ea78f 2116 if (put->flags & DPIF_FP_MODIFY
ae2ceebd 2117 && flow_equal(&match.flow, &netdev_flow->flow)) {
8a4e3a85
BP
2118 struct dp_netdev_actions *new_actions;
2119 struct dp_netdev_actions *old_actions;
2120
2121 new_actions = dp_netdev_actions_create(put->actions,
2122 put->actions_len);
2123
61e7deb1
BP
2124 old_actions = dp_netdev_flow_get_actions(netdev_flow);
2125 ovsrcu_set(&netdev_flow->actions, new_actions);
679ba04c 2126
a84cb64a
BP
2127 if (put->stats) {
2128 get_dpif_flow_stats(netdev_flow, put->stats);
2129 }
2130 if (put->flags & DPIF_FP_ZERO_STATS) {
97447f55
DDP
2131 /* XXX: The userspace datapath uses thread local statistics
2132 * (for flows), which should be updated only by the owning
2133 * thread. Since we cannot write on stats memory here,
2134 * we choose not to support this flag. Please note:
2135 * - This feature is currently used only by dpctl commands with
2136 * option --clear.
2137 * - Should the need arise, this operation can be implemented
2138 * by keeping a base value (to be update here) for each
2139 * counter, and subtracting it before outputting the stats */
2140 error = EOPNOTSUPP;
72865317 2141 }
8a4e3a85 2142
61e7deb1 2143 ovsrcu_postpone(dp_netdev_actions_free, old_actions);
2c0ea78f 2144 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 2145 error = EEXIST;
2c0ea78f
GS
2146 } else {
2147 /* Overlapping flow. */
2148 error = EINVAL;
72865317
BP
2149 }
2150 }
1c1e46ed
AW
2151 ovs_mutex_unlock(&pmd->flow_mutex);
2152 dp_netdev_pmd_unref(pmd);
5279f8fd
BP
2153
2154 return error;
72865317
BP
2155}
2156
72865317 2157static int
b99d3cee 2158dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
2159{
2160 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 2161 struct dp_netdev_flow *netdev_flow;
1c1e46ed 2162 struct dp_netdev_pmd_thread *pmd;
bd5131ba
DDP
2163 unsigned pmd_id = del->pmd_id == PMD_ID_NULL
2164 ? NON_PMD_CORE_ID : del->pmd_id;
70e5ed6f 2165 int error = 0;
72865317 2166
1c1e46ed
AW
2167 pmd = dp_netdev_get_pmd(dp, pmd_id);
2168 if (!pmd) {
2169 return EINVAL;
2170 }
2171
2172 ovs_mutex_lock(&pmd->flow_mutex);
2173 netdev_flow = dp_netdev_pmd_find_flow(pmd, del->ufid, del->key,
2174 del->key_len);
1763b4b8 2175 if (netdev_flow) {
b99d3cee 2176 if (del->stats) {
1763b4b8 2177 get_dpif_flow_stats(netdev_flow, del->stats);
feebdea2 2178 }
1c1e46ed 2179 dp_netdev_pmd_remove_flow(pmd, netdev_flow);
72865317 2180 } else {
5279f8fd 2181 error = ENOENT;
72865317 2182 }
1c1e46ed
AW
2183 ovs_mutex_unlock(&pmd->flow_mutex);
2184 dp_netdev_pmd_unref(pmd);
5279f8fd
BP
2185
2186 return error;
72865317
BP
2187}
2188
ac64794a
BP
2189struct dpif_netdev_flow_dump {
2190 struct dpif_flow_dump up;
1c1e46ed
AW
2191 struct cmap_position poll_thread_pos;
2192 struct cmap_position flow_pos;
2193 struct dp_netdev_pmd_thread *cur_pmd;
d2ad7ef1
JS
2194 int status;
2195 struct ovs_mutex mutex;
e723fd32
JS
2196};
2197
ac64794a
BP
2198static struct dpif_netdev_flow_dump *
2199dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
72865317 2200{
ac64794a 2201 return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
e723fd32
JS
2202}
2203
ac64794a 2204static struct dpif_flow_dump *
64bb477f 2205dpif_netdev_flow_dump_create(const struct dpif *dpif_, bool terse)
e723fd32 2206{
ac64794a 2207 struct dpif_netdev_flow_dump *dump;
e723fd32 2208
1c1e46ed 2209 dump = xzalloc(sizeof *dump);
ac64794a 2210 dpif_flow_dump_init(&dump->up, dpif_);
64bb477f 2211 dump->up.terse = terse;
ac64794a
BP
2212 ovs_mutex_init(&dump->mutex);
2213
2214 return &dump->up;
e723fd32
JS
2215}
2216
2217static int
ac64794a 2218dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
e723fd32 2219{
ac64794a 2220 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
e723fd32 2221
ac64794a
BP
2222 ovs_mutex_destroy(&dump->mutex);
2223 free(dump);
704a1e09
BP
2224 return 0;
2225}
2226
ac64794a
BP
2227struct dpif_netdev_flow_dump_thread {
2228 struct dpif_flow_dump_thread up;
2229 struct dpif_netdev_flow_dump *dump;
8bb113da
RW
2230 struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
2231 struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
ac64794a
BP
2232};
2233
2234static struct dpif_netdev_flow_dump_thread *
2235dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
2236{
2237 return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
2238}
2239
2240static struct dpif_flow_dump_thread *
2241dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
2242{
2243 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
2244 struct dpif_netdev_flow_dump_thread *thread;
2245
2246 thread = xmalloc(sizeof *thread);
2247 dpif_flow_dump_thread_init(&thread->up, &dump->up);
2248 thread->dump = dump;
2249 return &thread->up;
2250}
2251
2252static void
2253dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
2254{
2255 struct dpif_netdev_flow_dump_thread *thread
2256 = dpif_netdev_flow_dump_thread_cast(thread_);
2257
2258 free(thread);
2259}
2260
704a1e09 2261static int
ac64794a 2262dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
8bb113da 2263 struct dpif_flow *flows, int max_flows)
ac64794a
BP
2264{
2265 struct dpif_netdev_flow_dump_thread *thread
2266 = dpif_netdev_flow_dump_thread_cast(thread_);
2267 struct dpif_netdev_flow_dump *dump = thread->dump;
8bb113da 2268 struct dp_netdev_flow *netdev_flows[FLOW_DUMP_MAX_BATCH];
8bb113da
RW
2269 int n_flows = 0;
2270 int i;
14608a15 2271
ac64794a 2272 ovs_mutex_lock(&dump->mutex);
8bb113da 2273 if (!dump->status) {
1c1e46ed
AW
2274 struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
2275 struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
2276 struct dp_netdev_pmd_thread *pmd = dump->cur_pmd;
2277 int flow_limit = MIN(max_flows, FLOW_DUMP_MAX_BATCH);
2278
2279 /* First call to dump_next(), extracts the first pmd thread.
2280 * If there is no pmd thread, returns immediately. */
2281 if (!pmd) {
2282 pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
2283 if (!pmd) {
2284 ovs_mutex_unlock(&dump->mutex);
2285 return n_flows;
8bb113da 2286
8bb113da 2287 }
d2ad7ef1 2288 }
1c1e46ed
AW
2289
2290 do {
2291 for (n_flows = 0; n_flows < flow_limit; n_flows++) {
2292 struct cmap_node *node;
2293
2294 node = cmap_next_position(&pmd->flow_table, &dump->flow_pos);
2295 if (!node) {
2296 break;
2297 }
2298 netdev_flows[n_flows] = CONTAINER_OF(node,
2299 struct dp_netdev_flow,
2300 node);
2301 }
2302 /* When finishing dumping the current pmd thread, moves to
2303 * the next. */
2304 if (n_flows < flow_limit) {
2305 memset(&dump->flow_pos, 0, sizeof dump->flow_pos);
2306 dp_netdev_pmd_unref(pmd);
2307 pmd = dp_netdev_pmd_get_next(dp, &dump->poll_thread_pos);
2308 if (!pmd) {
2309 dump->status = EOF;
2310 break;
2311 }
2312 }
2313 /* Keeps the reference to next caller. */
2314 dump->cur_pmd = pmd;
2315
2316 /* If the current dump is empty, do not exit the loop, since the
2317 * remaining pmds could have flows to be dumped. Just dumps again
2318 * on the new 'pmd'. */
2319 } while (!n_flows);
8a4e3a85 2320 }
ac64794a 2321 ovs_mutex_unlock(&dump->mutex);
ac64794a 2322
8bb113da
RW
2323 for (i = 0; i < n_flows; i++) {
2324 struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
2325 struct odputil_keybuf *keybuf = &thread->keybuf[i];
2326 struct dp_netdev_flow *netdev_flow = netdev_flows[i];
2327 struct dpif_flow *f = &flows[i];
7af12bd7 2328 struct ofpbuf key, mask;
8bb113da 2329
7af12bd7
JS
2330 ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
2331 ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
64bb477f
JS
2332 dp_netdev_flow_to_dpif_flow(netdev_flow, &key, &mask, f,
2333 dump->up.terse);
8bb113da 2334 }
feebdea2 2335
8bb113da 2336 return n_flows;
72865317
BP
2337}
2338
2339static int
758c456d 2340dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
65f13b50 2341 OVS_NO_THREAD_SAFETY_ANALYSIS
72865317
BP
2342{
2343 struct dp_netdev *dp = get_dp_netdev(dpif);
65f13b50 2344 struct dp_netdev_pmd_thread *pmd;
1895cc8d 2345 struct dp_packet_batch pp;
72865317 2346
cf62fa4c
PS
2347 if (dp_packet_size(execute->packet) < ETH_HEADER_LEN ||
2348 dp_packet_size(execute->packet) > UINT16_MAX) {
72865317
BP
2349 return EINVAL;
2350 }
2351
65f13b50
AW
2352 /* Tries finding the 'pmd'. If NULL is returned, that means
2353 * the current thread is a non-pmd thread and should use
b19befae 2354 * dp_netdev_get_pmd(dp, NON_PMD_CORE_ID). */
65f13b50
AW
2355 pmd = ovsthread_getspecific(dp->per_pmd_key);
2356 if (!pmd) {
b19befae 2357 pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
65f13b50
AW
2358 }
2359
2360 /* If the current thread is non-pmd thread, acquires
2361 * the 'non_pmd_mutex'. */
2362 if (pmd->core_id == NON_PMD_CORE_ID) {
2363 ovs_mutex_lock(&dp->non_pmd_mutex);
433330a8 2364 ovs_mutex_lock(&dp->port_mutex);
65f13b50 2365 }
1c1e46ed 2366
1895cc8d
PS
2367 packet_batch_init_packet(&pp, execute->packet);
2368 dp_netdev_execute_actions(pmd, &pp, false, execute->actions,
9bbf1c3d 2369 execute->actions_len);
65f13b50 2370 if (pmd->core_id == NON_PMD_CORE_ID) {
1c1e46ed 2371 dp_netdev_pmd_unref(pmd);
433330a8 2372 ovs_mutex_unlock(&dp->port_mutex);
65f13b50
AW
2373 ovs_mutex_unlock(&dp->non_pmd_mutex);
2374 }
8a4e3a85 2375
758c456d 2376 return 0;
72865317
BP
2377}
2378
1a0c894a
BP
2379static void
2380dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
2381{
2382 size_t i;
2383
2384 for (i = 0; i < n_ops; i++) {
2385 struct dpif_op *op = ops[i];
2386
2387 switch (op->type) {
2388 case DPIF_OP_FLOW_PUT:
2389 op->error = dpif_netdev_flow_put(dpif, &op->u.flow_put);
2390 break;
2391
2392 case DPIF_OP_FLOW_DEL:
2393 op->error = dpif_netdev_flow_del(dpif, &op->u.flow_del);
2394 break;
2395
2396 case DPIF_OP_EXECUTE:
2397 op->error = dpif_netdev_execute(dpif, &op->u.execute);
2398 break;
6fe09f8c
JS
2399
2400 case DPIF_OP_FLOW_GET:
2401 op->error = dpif_netdev_flow_get(dpif, &op->u.flow_get);
2402 break;
1a0c894a
BP
2403 }
2404 }
2405}
2406
f2eee189
AW
2407/* Returns true if the configuration for rx queues or cpu mask
2408 * is changed. */
2409static bool
a14b8947 2410pmd_config_changed(const struct dp_netdev *dp, const char *cmask)
f2eee189 2411{
a14b8947
IM
2412 struct dp_netdev_port *port;
2413
2414 CMAP_FOR_EACH (port, node, &dp->ports) {
2415 struct netdev *netdev = port->netdev;
2416 int requested_n_rxq = netdev_requested_n_rxq(netdev);
2417 if (netdev_is_pmd(netdev)
2418 && port->latest_requested_n_rxq != requested_n_rxq) {
2419 return true;
f2eee189
AW
2420 }
2421 }
a14b8947
IM
2422
2423 if (dp->pmd_cmask != NULL && cmask != NULL) {
2424 return strcmp(dp->pmd_cmask, cmask);
2425 } else {
2426 return (dp->pmd_cmask != NULL || cmask != NULL);
2427 }
f2eee189
AW
2428}
2429
2430/* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
2431static int
a14b8947 2432dpif_netdev_pmd_set(struct dpif *dpif, const char *cmask)
f2eee189
AW
2433{
2434 struct dp_netdev *dp = get_dp_netdev(dpif);
2435
a14b8947 2436 if (pmd_config_changed(dp, cmask)) {
f2eee189
AW
2437 struct dp_netdev_port *port;
2438
2439 dp_netdev_destroy_all_pmds(dp);
2440
2441 CMAP_FOR_EACH (port, node, &dp->ports) {
a14b8947
IM
2442 struct netdev *netdev = port->netdev;
2443 int requested_n_rxq = netdev_requested_n_rxq(netdev);
2444 if (netdev_is_pmd(port->netdev)
2445 && port->latest_requested_n_rxq != requested_n_rxq) {
f2eee189
AW
2446 int i, err;
2447
2448 /* Closes the existing 'rxq's. */
2449 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
2450 netdev_rxq_close(port->rxq[i]);
2451 port->rxq[i] = NULL;
2452 }
490e82af 2453 port->n_rxq = 0;
f2eee189
AW
2454
2455 /* Sets the new rx queue config. */
3bcc10c0
DDP
2456 err = netdev_set_multiq(port->netdev,
2457 ovs_numa_get_n_cores() + 1,
a14b8947 2458 requested_n_rxq);
7251515e 2459 if (err && (err != EOPNOTSUPP)) {
f2eee189
AW
2460 VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
2461 " %u", netdev_get_name(port->netdev),
a14b8947 2462 requested_n_rxq);
f2eee189
AW
2463 return err;
2464 }
a14b8947 2465 port->latest_requested_n_rxq = requested_n_rxq;
f2eee189 2466 /* If the set_multiq() above succeeds, reopens the 'rxq's. */
490e82af
DDP
2467 port->n_rxq = netdev_n_rxq(port->netdev);
2468 port->rxq = xrealloc(port->rxq, sizeof *port->rxq * port->n_rxq);
2469 for (i = 0; i < port->n_rxq; i++) {
f2eee189
AW
2470 netdev_rxq_open(port->netdev, &port->rxq[i], i);
2471 }
2472 }
2473 }
f2eee189
AW
2474 /* Reconfigures the cpu mask. */
2475 ovs_numa_set_cpu_mask(cmask);
2476 free(dp->pmd_cmask);
2477 dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL;
2478
2479 /* Restores the non-pmd. */
2480 dp_netdev_set_nonpmd(dp);
2481 /* Restores all pmd threads. */
2482 dp_netdev_reset_pmd_threads(dp);
2483 }
2484
2485 return 0;
2486}
2487
5bf93d67
EJ
2488static int
2489dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
2490 uint32_t queue_id, uint32_t *priority)
2491{
2492 *priority = queue_id;
2493 return 0;
2494}
2495
72865317 2496\f
9ff55ae2
DDP
2497/* Creates and returns a new 'struct dp_netdev_actions', whose actions are
2498 * a copy of the 'ofpacts_len' bytes of 'ofpacts'. */
a84cb64a
BP
2499struct dp_netdev_actions *
2500dp_netdev_actions_create(const struct nlattr *actions, size_t size)
2501{
2502 struct dp_netdev_actions *netdev_actions;
2503
9ff55ae2
DDP
2504 netdev_actions = xmalloc(sizeof *netdev_actions + size);
2505 memcpy(netdev_actions->actions, actions, size);
a84cb64a
BP
2506 netdev_actions->size = size;
2507
2508 return netdev_actions;
2509}
2510
a84cb64a 2511struct dp_netdev_actions *
61e7deb1 2512dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
a84cb64a 2513{
61e7deb1 2514 return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
a84cb64a
BP
2515}
2516
61e7deb1
BP
2517static void
2518dp_netdev_actions_free(struct dp_netdev_actions *actions)
a84cb64a 2519{
61e7deb1 2520 free(actions);
a84cb64a
BP
2521}
2522\f
55e3ca97
DDP
2523static inline unsigned long long
2524cycles_counter(void)
2525{
2526#ifdef DPDK_NETDEV
2527 return rte_get_tsc_cycles();
2528#else
2529 return 0;
2530#endif
2531}
2532
2533/* Fake mutex to make sure that the calls to cycles_count_* are balanced */
2534extern struct ovs_mutex cycles_counter_fake_mutex;
2535
2536/* Start counting cycles. Must be followed by 'cycles_count_end()' */
2537static inline void
2538cycles_count_start(struct dp_netdev_pmd_thread *pmd)
2539 OVS_ACQUIRES(&cycles_counter_fake_mutex)
2540 OVS_NO_THREAD_SAFETY_ANALYSIS
2541{
2542 pmd->last_cycles = cycles_counter();
2543}
2544
2545/* Stop counting cycles and add them to the counter 'type' */
2546static inline void
2547cycles_count_end(struct dp_netdev_pmd_thread *pmd,
2548 enum pmd_cycles_counter_type type)
2549 OVS_RELEASES(&cycles_counter_fake_mutex)
2550 OVS_NO_THREAD_SAFETY_ANALYSIS
2551{
2552 unsigned long long interval = cycles_counter() - pmd->last_cycles;
2553
2554 non_atomic_ullong_add(&pmd->cycles.n[type], interval);
2555}
e4cfed38 2556
5794e276 2557static void
65f13b50 2558dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
9bbf1c3d
DDP
2559 struct dp_netdev_port *port,
2560 struct netdev_rxq *rxq)
e4cfed38 2561{
1895cc8d
PS
2562 struct dp_packet_batch batch;
2563 int error;
e4cfed38 2564
1895cc8d 2565 dp_packet_batch_init(&batch);
55e3ca97 2566 cycles_count_start(pmd);
1895cc8d 2567 error = netdev_rxq_recv(rxq, &batch);
55e3ca97 2568 cycles_count_end(pmd, PMD_CYCLES_POLLING);
e4cfed38 2569 if (!error) {
3c33f0ff 2570 *recirc_depth_get() = 0;
41ccaa24 2571
55e3ca97 2572 cycles_count_start(pmd);
1895cc8d 2573 dp_netdev_input(pmd, &batch, port->port_no);
55e3ca97 2574 cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
e4cfed38 2575 } else if (error != EAGAIN && error != EOPNOTSUPP) {
3c33f0ff 2576 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
e4cfed38
PS
2577
2578 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
3c33f0ff 2579 netdev_get_name(port->netdev), ovs_strerror(error));
e4cfed38
PS
2580 }
2581}
2582
a36de779
PS
2583/* Return true if needs to revalidate datapath flows. */
2584static bool
e4cfed38
PS
2585dpif_netdev_run(struct dpif *dpif)
2586{
2587 struct dp_netdev_port *port;
2588 struct dp_netdev *dp = get_dp_netdev(dpif);
b19befae
AW
2589 struct dp_netdev_pmd_thread *non_pmd = dp_netdev_get_pmd(dp,
2590 NON_PMD_CORE_ID);
a36de779 2591 uint64_t new_tnl_seq;
e4cfed38 2592
65f13b50 2593 ovs_mutex_lock(&dp->non_pmd_mutex);
a532e683 2594 CMAP_FOR_EACH (port, node, &dp->ports) {
55c955bd
PS
2595 if (!netdev_is_pmd(port->netdev)) {
2596 int i;
2597
490e82af 2598 for (i = 0; i < port->n_rxq; i++) {
65f13b50 2599 dp_netdev_process_rxq_port(non_pmd, port, port->rxq[i]);
55c955bd 2600 }
e4cfed38
PS
2601 }
2602 }
65f13b50 2603 ovs_mutex_unlock(&dp->non_pmd_mutex);
1c1e46ed
AW
2604 dp_netdev_pmd_unref(non_pmd);
2605
53902038 2606 tnl_neigh_cache_run();
7f9b8504 2607 tnl_port_map_run();
a36de779
PS
2608 new_tnl_seq = seq_read(tnl_conf_seq);
2609
2610 if (dp->last_tnl_conf_seq != new_tnl_seq) {
2611 dp->last_tnl_conf_seq = new_tnl_seq;
2612 return true;
2613 }
2614 return false;
e4cfed38
PS
2615}
2616
2617static void
2618dpif_netdev_wait(struct dpif *dpif)
2619{
2620 struct dp_netdev_port *port;
2621 struct dp_netdev *dp = get_dp_netdev(dpif);
2622
59e6d833 2623 ovs_mutex_lock(&dp_netdev_mutex);
a532e683 2624 CMAP_FOR_EACH (port, node, &dp->ports) {
55c955bd
PS
2625 if (!netdev_is_pmd(port->netdev)) {
2626 int i;
2627
490e82af 2628 for (i = 0; i < port->n_rxq; i++) {
55c955bd
PS
2629 netdev_rxq_wait(port->rxq[i]);
2630 }
e4cfed38
PS
2631 }
2632 }
59e6d833 2633 ovs_mutex_unlock(&dp_netdev_mutex);
a36de779 2634 seq_wait(tnl_conf_seq, dp->last_tnl_conf_seq);
e4cfed38
PS
2635}
2636
e4cfed38 2637static int
62453dad 2638pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll **ppoll_list)
ae7ad0a1 2639 OVS_REQUIRES(pmd->poll_mutex)
e4cfed38 2640{
f7791740 2641 struct rxq_poll *poll_list = *ppoll_list;
ae7ad0a1
IM
2642 struct rxq_poll *poll;
2643 int i;
e4cfed38 2644
ae7ad0a1 2645 poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof *poll_list);
a1fdee13 2646
ae7ad0a1
IM
2647 i = 0;
2648 LIST_FOR_EACH (poll, node, &pmd->poll_list) {
ae7ad0a1 2649 poll_list[i++] = *poll;
e4cfed38
PS
2650 }
2651
e4cfed38 2652 *ppoll_list = poll_list;
ae7ad0a1 2653 return pmd->poll_cnt;
e4cfed38
PS
2654}
2655
6c3eee82 2656static void *
e4cfed38 2657pmd_thread_main(void *f_)
6c3eee82 2658{
65f13b50 2659 struct dp_netdev_pmd_thread *pmd = f_;
e4cfed38 2660 unsigned int lc = 0;
f7791740 2661 struct rxq_poll *poll_list;
84067a4c 2662 unsigned int port_seq = PMD_INITIAL_SEQ;
e4cfed38
PS
2663 int poll_cnt;
2664 int i;
6c3eee82 2665
e4cfed38
PS
2666 poll_cnt = 0;
2667 poll_list = NULL;
2668
65f13b50
AW
2669 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
2670 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
2671 pmd_thread_setaffinity_cpu(pmd->core_id);
e4cfed38 2672reload:
65f13b50 2673 emc_cache_init(&pmd->flow_cache);
ae7ad0a1
IM
2674
2675 ovs_mutex_lock(&pmd->poll_mutex);
62453dad 2676 poll_cnt = pmd_load_queues(pmd, &poll_list);
ae7ad0a1 2677 ovs_mutex_unlock(&pmd->poll_mutex);
6c3eee82 2678
7dd671f0
MK
2679 /* List port/core affinity */
2680 for (i = 0; i < poll_cnt; i++) {
ce179f11
IM
2681 VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
2682 pmd->core_id, netdev_get_name(poll_list[i].port->netdev),
2683 netdev_rxq_get_queue_id(poll_list[i].rx));
7dd671f0
MK
2684 }
2685
accf8626
AW
2686 /* Signal here to make sure the pmd finishes
2687 * reloading the updated configuration. */
2688 dp_netdev_pmd_reload_done(pmd);
2689
e4cfed38 2690 for (;;) {
e4cfed38 2691 for (i = 0; i < poll_cnt; i++) {
65f13b50 2692 dp_netdev_process_rxq_port(pmd, poll_list[i].port, poll_list[i].rx);
e4cfed38
PS
2693 }
2694
2695 if (lc++ > 1024) {
84067a4c 2696 unsigned int seq;
6c3eee82 2697
e4cfed38 2698 lc = 0;
84067a4c 2699
67ad54cb 2700 emc_cache_slow_sweep(&pmd->flow_cache);
fbe0962b 2701 coverage_try_clear();
84067a4c
JR
2702 ovsrcu_quiesce();
2703
65f13b50 2704 atomic_read_relaxed(&pmd->change_seq, &seq);
84067a4c
JR
2705 if (seq != port_seq) {
2706 port_seq = seq;
6c3eee82
BP
2707 break;
2708 }
2709 }
e4cfed38 2710 }
6c3eee82 2711
65f13b50 2712 emc_cache_uninit(&pmd->flow_cache);
9bbf1c3d 2713
65f13b50 2714 if (!latch_is_set(&pmd->exit_latch)){
e4cfed38
PS
2715 goto reload;
2716 }
6c3eee82 2717
accf8626
AW
2718 dp_netdev_pmd_reload_done(pmd);
2719
e4cfed38 2720 free(poll_list);
6c3eee82
BP
2721 return NULL;
2722}
2723
6b31e073
RW
2724static void
2725dp_netdev_disable_upcall(struct dp_netdev *dp)
2726 OVS_ACQUIRES(dp->upcall_rwlock)
2727{
2728 fat_rwlock_wrlock(&dp->upcall_rwlock);
2729}
2730
2731static void
2732dpif_netdev_disable_upcall(struct dpif *dpif)
2733 OVS_NO_THREAD_SAFETY_ANALYSIS
2734{
2735 struct dp_netdev *dp = get_dp_netdev(dpif);
2736 dp_netdev_disable_upcall(dp);
2737}
2738
2739static void
2740dp_netdev_enable_upcall(struct dp_netdev *dp)
2741 OVS_RELEASES(dp->upcall_rwlock)
2742{
2743 fat_rwlock_unlock(&dp->upcall_rwlock);
2744}
2745
2746static void
2747dpif_netdev_enable_upcall(struct dpif *dpif)
2748 OVS_NO_THREAD_SAFETY_ANALYSIS
2749{
2750 struct dp_netdev *dp = get_dp_netdev(dpif);
2751 dp_netdev_enable_upcall(dp);
2752}
2753
ae7ad0a1 2754static void
accf8626
AW
2755dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd)
2756{
2757 ovs_mutex_lock(&pmd->cond_mutex);
2758 xpthread_cond_signal(&pmd->cond);
2759 ovs_mutex_unlock(&pmd->cond_mutex);
2760}
2761
1c1e46ed
AW
2762/* Finds and refs the dp_netdev_pmd_thread on core 'core_id'. Returns
2763 * the pointer if succeeds, otherwise, NULL.
2764 *
2765 * Caller must unrefs the returned reference. */
65f13b50 2766static struct dp_netdev_pmd_thread *
bd5131ba 2767dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id)
65f13b50
AW
2768{
2769 struct dp_netdev_pmd_thread *pmd;
55847abe 2770 const struct cmap_node *pnode;
65f13b50 2771
b19befae 2772 pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0));
1c1e46ed
AW
2773 if (!pnode) {
2774 return NULL;
2775 }
65f13b50
AW
2776 pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);
2777
1c1e46ed 2778 return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL;
65f13b50
AW
2779}
2780
f2eee189
AW
2781/* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */
2782static void
2783dp_netdev_set_nonpmd(struct dp_netdev *dp)
2784{
2785 struct dp_netdev_pmd_thread *non_pmd;
2786
2787 non_pmd = xzalloc(sizeof *non_pmd);
2788 dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID,
2789 OVS_NUMA_UNSPEC);
2790}
2791
1c1e46ed
AW
2792/* Caller must have valid pointer to 'pmd'. */
2793static bool
2794dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd)
2795{
2796 return ovs_refcount_try_ref_rcu(&pmd->ref_cnt);
2797}
2798
2799static void
2800dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd)
2801{
2802 if (pmd && ovs_refcount_unref(&pmd->ref_cnt) == 1) {
2803 ovsrcu_postpone(dp_netdev_destroy_pmd, pmd);
2804 }
2805}
2806
2807/* Given cmap position 'pos', tries to ref the next node. If try_ref()
2808 * fails, keeps checking for next node until reaching the end of cmap.
2809 *
2810 * Caller must unrefs the returned reference. */
2811static struct dp_netdev_pmd_thread *
2812dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos)
2813{
2814 struct dp_netdev_pmd_thread *next;
2815
2816 do {
2817 struct cmap_node *node;
2818
2819 node = cmap_next_position(&dp->poll_threads, pos);
2820 next = node ? CONTAINER_OF(node, struct dp_netdev_pmd_thread, node)
2821 : NULL;
2822 } while (next && !dp_netdev_pmd_try_ref(next));
2823
2824 return next;
2825}
2826
65f13b50 2827/* Configures the 'pmd' based on the input argument. */
6c3eee82 2828static void
65f13b50 2829dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
bd5131ba 2830 int index, unsigned core_id, int numa_id)
65f13b50
AW
2831{
2832 pmd->dp = dp;
2833 pmd->index = index;
2834 pmd->core_id = core_id;
2835 pmd->numa_id = numa_id;
ae7ad0a1 2836 pmd->poll_cnt = 0;
1c1e46ed 2837
347ba9bb
IM
2838 atomic_init(&pmd->tx_qid,
2839 (core_id == NON_PMD_CORE_ID)
2840 ? ovs_numa_get_n_cores()
2841 : get_n_pmd_threads(dp));
2842
1c1e46ed 2843 ovs_refcount_init(&pmd->ref_cnt);
65f13b50
AW
2844 latch_init(&pmd->exit_latch);
2845 atomic_init(&pmd->change_seq, PMD_INITIAL_SEQ);
accf8626
AW
2846 xpthread_cond_init(&pmd->cond, NULL);
2847 ovs_mutex_init(&pmd->cond_mutex);
1c1e46ed 2848 ovs_mutex_init(&pmd->flow_mutex);
ae7ad0a1 2849 ovs_mutex_init(&pmd->poll_mutex);
1c1e46ed
AW
2850 dpcls_init(&pmd->cls);
2851 cmap_init(&pmd->flow_table);
417e7e66 2852 ovs_list_init(&pmd->poll_list);
65f13b50
AW
2853 /* init the 'flow_cache' since there is no
2854 * actual thread created for NON_PMD_CORE_ID. */
2855 if (core_id == NON_PMD_CORE_ID) {
2856 emc_cache_init(&pmd->flow_cache);
2857 }
2858 cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, &pmd->node),
2859 hash_int(core_id, 0));
2860}
2861
1c1e46ed
AW
2862static void
2863dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
2864{
2865 dp_netdev_pmd_flow_flush(pmd);
2866 dpcls_destroy(&pmd->cls);
2867 cmap_destroy(&pmd->flow_table);
2868 ovs_mutex_destroy(&pmd->flow_mutex);
2869 latch_destroy(&pmd->exit_latch);
2870 xpthread_cond_destroy(&pmd->cond);
2871 ovs_mutex_destroy(&pmd->cond_mutex);
ae7ad0a1 2872 ovs_mutex_destroy(&pmd->poll_mutex);
1c1e46ed
AW
2873 free(pmd);
2874}
2875
2876/* Stops the pmd thread, removes it from the 'dp->poll_threads',
2877 * and unrefs the struct. */
65f13b50 2878static void
e4e74c3a 2879dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
6c3eee82 2880{
65f13b50 2881 /* Uninit the 'flow_cache' since there is
1c1e46ed 2882 * no actual thread uninit it for NON_PMD_CORE_ID. */
65f13b50
AW
2883 if (pmd->core_id == NON_PMD_CORE_ID) {
2884 emc_cache_uninit(&pmd->flow_cache);
2885 } else {
2886 latch_set(&pmd->exit_latch);
2887 dp_netdev_reload_pmd__(pmd);
2888 ovs_numa_unpin_core(pmd->core_id);
2889 xpthread_join(pmd->thread, NULL);
2890 }
ae7ad0a1
IM
2891
2892 /* Unref all ports and free poll_list. */
cc245ce8 2893 dp_netdev_pmd_clear_poll_list(pmd);
ae7ad0a1 2894
e4e74c3a
AW
2895 /* Purges the 'pmd''s flows after stopping the thread, but before
2896 * destroying the flows, so that the flow stats can be collected. */
2897 if (dp->dp_purge_cb) {
2898 dp->dp_purge_cb(dp->dp_purge_aux, pmd->core_id);
2899 }
65f13b50 2900 cmap_remove(&pmd->dp->poll_threads, &pmd->node, hash_int(pmd->core_id, 0));
1c1e46ed 2901 dp_netdev_pmd_unref(pmd);
65f13b50 2902}
6c3eee82 2903
65f13b50
AW
2904/* Destroys all pmd threads. */
2905static void
2906dp_netdev_destroy_all_pmds(struct dp_netdev *dp)
2907{
2908 struct dp_netdev_pmd_thread *pmd;
d916785c
DDP
2909 struct dp_netdev_pmd_thread **pmd_list;
2910 size_t k = 0, n_pmds;
2911
2912 n_pmds = cmap_count(&dp->poll_threads);
2913 pmd_list = xcalloc(n_pmds, sizeof *pmd_list);
65f13b50
AW
2914
2915 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
d916785c
DDP
2916 /* We cannot call dp_netdev_del_pmd(), since it alters
2917 * 'dp->poll_threads' (while we're iterating it) and it
2918 * might quiesce. */
2919 ovs_assert(k < n_pmds);
2920 pmd_list[k++] = pmd;
6c3eee82 2921 }
d916785c
DDP
2922
2923 for (size_t i = 0; i < k; i++) {
2924 dp_netdev_del_pmd(dp, pmd_list[i]);
2925 }
2926 free(pmd_list);
65f13b50 2927}
6c3eee82 2928
347ba9bb
IM
2929/* Deletes all pmd threads on numa node 'numa_id' and
2930 * fixes tx_qids of other threads to keep them sequential. */
65f13b50
AW
2931static void
2932dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id)
2933{
2934 struct dp_netdev_pmd_thread *pmd;
347ba9bb
IM
2935 int n_pmds_on_numa, n_pmds;
2936 int *free_idx, k = 0;
d916785c 2937 struct dp_netdev_pmd_thread **pmd_list;
347ba9bb
IM
2938
2939 n_pmds_on_numa = get_n_pmd_threads_on_numa(dp, numa_id);
d916785c
DDP
2940 free_idx = xcalloc(n_pmds_on_numa, sizeof *free_idx);
2941 pmd_list = xcalloc(n_pmds_on_numa, sizeof *pmd_list);
6c3eee82 2942
65f13b50 2943 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
d916785c
DDP
2944 /* We cannot call dp_netdev_del_pmd(), since it alters
2945 * 'dp->poll_threads' (while we're iterating it) and it
2946 * might quiesce. */
65f13b50 2947 if (pmd->numa_id == numa_id) {
347ba9bb 2948 atomic_read_relaxed(&pmd->tx_qid, &free_idx[k]);
d916785c
DDP
2949 pmd_list[k] = pmd;
2950 ovs_assert(k < n_pmds_on_numa);
347ba9bb 2951 k++;
65f13b50 2952 }
6c3eee82 2953 }
347ba9bb 2954
d916785c
DDP
2955 for (int i = 0; i < k; i++) {
2956 dp_netdev_del_pmd(dp, pmd_list[i]);
2957 }
2958
347ba9bb
IM
2959 n_pmds = get_n_pmd_threads(dp);
2960 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
2961 int old_tx_qid;
2962
2963 atomic_read_relaxed(&pmd->tx_qid, &old_tx_qid);
2964
2965 if (old_tx_qid >= n_pmds) {
2966 int new_tx_qid = free_idx[--k];
2967
2968 atomic_store_relaxed(&pmd->tx_qid, new_tx_qid);
2969 }
2970 }
2971
d916785c 2972 free(pmd_list);
347ba9bb 2973 free(free_idx);
65f13b50 2974}
6c3eee82 2975
cc245ce8
IM
2976/* Deletes all rx queues from pmd->poll_list. */
2977static void
2978dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd)
2979{
2980 struct rxq_poll *poll;
2981
2982 ovs_mutex_lock(&pmd->poll_mutex);
2983 LIST_FOR_EACH_POP (poll, node, &pmd->poll_list) {
cc245ce8
IM
2984 free(poll);
2985 }
2986 pmd->poll_cnt = 0;
2987 ovs_mutex_unlock(&pmd->poll_mutex);
2988}
2989
2990/* Deletes all rx queues of 'port' from poll_list of pmd thread and
2991 * reloads it if poll_list was changed. */
2992static void
2993dp_netdev_del_port_from_pmd(struct dp_netdev_port *port,
2994 struct dp_netdev_pmd_thread *pmd)
2995{
2996 struct rxq_poll *poll, *next;
2997 bool found = false;
2998
2999 ovs_mutex_lock(&pmd->poll_mutex);
3000 LIST_FOR_EACH_SAFE (poll, next, node, &pmd->poll_list) {
3001 if (poll->port == port) {
3002 found = true;
417e7e66 3003 ovs_list_remove(&poll->node);
cc245ce8
IM
3004 pmd->poll_cnt--;
3005 free(poll);
3006 }
3007 }
3008 ovs_mutex_unlock(&pmd->poll_mutex);
3009 if (found) {
3010 dp_netdev_reload_pmd__(pmd);
3011 }
3012}
3013
3014/* Deletes all rx queues of 'port' from all pmd threads of dp and
3015 * reloads them if needed. */
3016static void
3017dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
3018 struct dp_netdev_port *port)
3019{
3020 int numa_id = netdev_get_numa_id(port->netdev);
3021 struct dp_netdev_pmd_thread *pmd;
3022
3023 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
3024 if (pmd->numa_id == numa_id) {
3025 dp_netdev_del_port_from_pmd(port, pmd);
3026 }
3027 }
3028}
3029
ae7ad0a1
IM
3030/* Returns PMD thread from this numa node with fewer rx queues to poll.
3031 * Returns NULL if there is no PMD threads on this numa node.
3032 * Can be called safely only by main thread. */
3033static struct dp_netdev_pmd_thread *
3034dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id)
3035{
3036 int min_cnt = -1;
3037 struct dp_netdev_pmd_thread *pmd, *res = NULL;
3038
3039 CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
3040 if (pmd->numa_id == numa_id
3041 && (min_cnt > pmd->poll_cnt || res == NULL)) {
3042 min_cnt = pmd->poll_cnt;
3043 res = pmd;
3044 }
3045 }
3046
3047 return res;
3048}
3049
3050/* Adds rx queue to poll_list of PMD thread. */
3051static void
3052dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
3053 struct dp_netdev_port *port, struct netdev_rxq *rx)
3054 OVS_REQUIRES(pmd->poll_mutex)
3055{
3056 struct rxq_poll *poll = xmalloc(sizeof *poll);
3057
ae7ad0a1
IM
3058 poll->port = port;
3059 poll->rx = rx;
3060
417e7e66 3061 ovs_list_push_back(&pmd->poll_list, &poll->node);
ae7ad0a1
IM
3062 pmd->poll_cnt++;
3063}
3064
cc245ce8
IM
3065/* Distributes all rx queues of 'port' between all PMD threads and reloads
3066 * them if needed. */
3067static void
3068dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port)
3069{
3070 int numa_id = netdev_get_numa_id(port->netdev);
3071 struct dp_netdev_pmd_thread *pmd;
3072 struct hmapx to_reload;
3073 struct hmapx_node *node;
3074 int i;
3075
3076 hmapx_init(&to_reload);
3077 /* Cannot create pmd threads for invalid numa node. */
3078 ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
3079
490e82af 3080 for (i = 0; i < port->n_rxq; i++) {
cc245ce8
IM
3081 pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id);
3082 if (!pmd) {
3083 /* There is no pmd threads on this numa node. */
3084 dp_netdev_set_pmds_on_numa(dp, numa_id);
3085 /* Assigning of rx queues done. */
3086 break;
3087 }
3088
3089 ovs_mutex_lock(&pmd->poll_mutex);
3090 dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]);
3091 ovs_mutex_unlock(&pmd->poll_mutex);
3092
3093 hmapx_add(&to_reload, pmd);
3094 }
3095
3096 HMAPX_FOR_EACH (node, &to_reload) {
3097 pmd = (struct dp_netdev_pmd_thread *) node->data;
3098 dp_netdev_reload_pmd__(pmd);
3099 }
3100
3101 hmapx_destroy(&to_reload);
3102}
3103
65f13b50
AW
3104/* Checks the numa node id of 'netdev' and starts pmd threads for
3105 * the numa node. */
3106static void
3107dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
3108{
3109 int n_pmds;
e4cfed38 3110
65f13b50
AW
3111 if (!ovs_numa_numa_id_is_valid(numa_id)) {
3112 VLOG_ERR("Cannot create pmd threads due to numa id (%d)"
3113 "invalid", numa_id);
3114 return ;
3115 }
3116
3117 n_pmds = get_n_pmd_threads_on_numa(dp, numa_id);
3118
3119 /* If there are already pmd threads created for the numa node
3120 * in which 'netdev' is on, do nothing. Else, creates the
3121 * pmd threads for the numa node. */
3122 if (!n_pmds) {
ae7ad0a1 3123 int can_have, n_unpinned, i, index = 0;
2aca813c 3124 struct dp_netdev_pmd_thread **pmds;
ae7ad0a1 3125 struct dp_netdev_port *port;
65f13b50
AW
3126
3127 n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
3128 if (!n_unpinned) {
3129 VLOG_ERR("Cannot create pmd threads due to out of unpinned "
27955e98 3130 "cores on numa node %d", numa_id);
65f13b50
AW
3131 return;
3132 }
6c3eee82 3133
f2eee189
AW
3134 /* If cpu mask is specified, uses all unpinned cores, otherwise
3135 * tries creating NR_PMD_THREADS pmd threads. */
3136 can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS);
2aca813c 3137 pmds = xzalloc(can_have * sizeof *pmds);
65f13b50 3138 for (i = 0; i < can_have; i++) {
bd5131ba 3139 unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
2aca813c
IM
3140 pmds[i] = xzalloc(sizeof **pmds);
3141 dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
3142 }
ae7ad0a1
IM
3143
3144 /* Distributes rx queues of this numa node between new pmd threads. */
3145 CMAP_FOR_EACH (port, node, &dp->ports) {
3146 if (netdev_is_pmd(port->netdev)
3147 && netdev_get_numa_id(port->netdev) == numa_id) {
490e82af 3148 for (i = 0; i < port->n_rxq; i++) {
ae7ad0a1
IM
3149 /* Make thread-safety analyser happy. */
3150 ovs_mutex_lock(&pmds[index]->poll_mutex);
3151 dp_netdev_add_rxq_to_pmd(pmds[index], port, port->rxq[i]);
3152 ovs_mutex_unlock(&pmds[index]->poll_mutex);
3153 index = (index + 1) % can_have;
3154 }
3155 }
3156 }
3157
3158 /* Actual start of pmd threads. */
2aca813c 3159 for (i = 0; i < can_have; i++) {
2aca813c 3160 pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, pmds[i]);
65f13b50 3161 }
2aca813c 3162 free(pmds);
65f13b50 3163 VLOG_INFO("Created %d pmd threads on numa node %d", can_have, numa_id);
6c3eee82
BP
3164 }
3165}
e4cfed38 3166
6c3eee82 3167\f
f2eee189
AW
3168/* Called after pmd threads config change. Restarts pmd threads with
3169 * new configuration. */
3170static void
3171dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
3172{
3173 struct dp_netdev_port *port;
3174
3175 CMAP_FOR_EACH (port, node, &dp->ports) {
3176 if (netdev_is_pmd(port->netdev)) {
3177 int numa_id = netdev_get_numa_id(port->netdev);
3178
3179 dp_netdev_set_pmds_on_numa(dp, numa_id);
3180 }
3181 }
3182}
3183
b5cbbcf6
AZ
3184static char *
3185dpif_netdev_get_datapath_version(void)
3186{
3187 return xstrdup("<built-in>");
3188}
3189
72865317 3190static void
1c1e46ed 3191dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, int cnt, int size,
11bfdadd 3192 uint16_t tcp_flags, long long now)
72865317 3193{
eb94da30 3194 uint16_t flags;
72865317 3195
eb94da30
DDP
3196 atomic_store_relaxed(&netdev_flow->stats.used, now);
3197 non_atomic_ullong_add(&netdev_flow->stats.packet_count, cnt);
3198 non_atomic_ullong_add(&netdev_flow->stats.byte_count, size);
3199 atomic_read_relaxed(&netdev_flow->stats.tcp_flags, &flags);
3200 flags |= tcp_flags;
3201 atomic_store_relaxed(&netdev_flow->stats.tcp_flags, flags);
51852a57
BP
3202}
3203
3204static void
1c1e46ed
AW
3205dp_netdev_count_packet(struct dp_netdev_pmd_thread *pmd,
3206 enum dp_stat_type type, int cnt)
51852a57 3207{
eb94da30 3208 non_atomic_ullong_add(&pmd->stats.n[type], cnt);
51852a57
BP
3209}
3210
623540e4 3211static int
e14deea0 3212dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
7af12bd7 3213 struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
623540e4
EJ
3214 enum dpif_upcall_type type, const struct nlattr *userdata,
3215 struct ofpbuf *actions, struct ofpbuf *put_actions)
3216{
1c1e46ed 3217 struct dp_netdev *dp = pmd->dp;
6728d578
JG
3218 struct flow_tnl orig_tunnel;
3219 int err;
623540e4 3220
623540e4
EJ
3221 if (OVS_UNLIKELY(!dp->upcall_cb)) {
3222 return ENODEV;
3223 }
3224
6728d578
JG
3225 /* Upcall processing expects the Geneve options to be in the translated
3226 * format but we need to retain the raw format for datapath use. */
3227 orig_tunnel.flags = flow->tunnel.flags;
3228 if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
3229 orig_tunnel.metadata.present.len = flow->tunnel.metadata.present.len;
3230 memcpy(orig_tunnel.metadata.opts.gnv, flow->tunnel.metadata.opts.gnv,
3231 flow->tunnel.metadata.present.len);
3232 err = tun_metadata_from_geneve_udpif(&orig_tunnel, &orig_tunnel,
3233 &flow->tunnel);
3234 if (err) {
3235 return err;
3236 }
3237 }
3238
623540e4
EJ
3239 if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) {
3240 struct ds ds = DS_EMPTY_INITIALIZER;
623540e4 3241 char *packet_str;
cf62fa4c 3242 struct ofpbuf key;
5262eea1
JG
3243 struct odp_flow_key_parms odp_parms = {
3244 .flow = flow,
3245 .mask = &wc->masks,
3246 .odp_in_port = flow->in_port.odp_port,
2494ccd7 3247 .support = dp_netdev_support,
5262eea1 3248 };
623540e4
EJ
3249
3250 ofpbuf_init(&key, 0);
5262eea1 3251 odp_flow_key_from_flow(&odp_parms, &key);
cf62fa4c
PS
3252 packet_str = ofp_packet_to_string(dp_packet_data(packet_),
3253 dp_packet_size(packet_));
623540e4 3254
6fd6ed71 3255 odp_flow_key_format(key.data, key.size, &ds);
623540e4
EJ
3256
3257 VLOG_DBG("%s: %s upcall:\n%s\n%s", dp->name,
3258 dpif_upcall_type_to_string(type), ds_cstr(&ds), packet_str);
3259
3260 ofpbuf_uninit(&key);
3261 free(packet_str);
6fd6ed71 3262
623540e4
EJ
3263 ds_destroy(&ds);
3264 }
3265
6728d578
JG
3266 err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
3267 actions, wc, put_actions, dp->upcall_aux);
3268 if (err && err != ENOSPC) {
3269 return err;
3270 }
3271
3272 /* Translate tunnel metadata masks to datapath format. */
3273 if (wc) {
3274 if (wc->masks.tunnel.metadata.present.map) {
4e548ad9 3275 struct geneve_opt opts[TLV_TOT_OPT_SIZE /
6728d578
JG
3276 sizeof(struct geneve_opt)];
3277
3f32cfeb
JG
3278 if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) {
3279 tun_metadata_to_geneve_udpif_mask(&flow->tunnel,
3280 &wc->masks.tunnel,
3281 orig_tunnel.metadata.opts.gnv,
3282 orig_tunnel.metadata.present.len,
3283 opts);
3284 } else {
3285 orig_tunnel.metadata.present.len = 0;
3286 }
6728d578
JG
3287
3288 memset(&wc->masks.tunnel.metadata, 0,
3289 sizeof wc->masks.tunnel.metadata);
3290 memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts,
3291 orig_tunnel.metadata.present.len);
3292 }
3293 wc->masks.tunnel.metadata.present.len = 0xff;
3294 }
3295
3296 /* Restore tunnel metadata. We need to use the saved options to ensure
3297 * that any unknown options are not lost. The generated mask will have
3298 * the same structure, matching on types and lengths but wildcarding
3299 * option data we don't care about. */
3300 if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) {
3301 memcpy(&flow->tunnel.metadata.opts.gnv, orig_tunnel.metadata.opts.gnv,
3302 orig_tunnel.metadata.present.len);
3303 flow->tunnel.metadata.present.len = orig_tunnel.metadata.present.len;
3304 flow->tunnel.flags |= FLOW_TNL_F_UDPIF;
3305 }
3306
3307 return err;
623540e4
EJ
3308}
3309
9bbf1c3d 3310static inline uint32_t
048963aa
DDP
3311dpif_netdev_packet_get_rss_hash(struct dp_packet *packet,
3312 const struct miniflow *mf)
9bbf1c3d 3313{
048963aa 3314 uint32_t hash, recirc_depth;
9bbf1c3d 3315
f2f44f5d
DDP
3316 if (OVS_LIKELY(dp_packet_rss_valid(packet))) {
3317 hash = dp_packet_get_rss_hash(packet);
3318 } else {
9bbf1c3d 3319 hash = miniflow_hash_5tuple(mf, 0);
2bc1bbd2 3320 dp_packet_set_rss_hash(packet, hash);
9bbf1c3d 3321 }
048963aa
DDP
3322
3323 /* The RSS hash must account for the recirculation depth to avoid
3324 * collisions in the exact match cache */
3325 recirc_depth = *recirc_depth_get_unsafe();
3326 if (OVS_UNLIKELY(recirc_depth)) {
3327 hash = hash_finish(hash, recirc_depth);
3328 dp_packet_set_rss_hash(packet, hash);
3329 }
9bbf1c3d
DDP
3330 return hash;
3331}
3332
f7ce4811 3333struct packet_batch_per_flow {
8cbf4f47
DDP
3334 unsigned int byte_count;
3335 uint16_t tcp_flags;
8cbf4f47
DDP
3336 struct dp_netdev_flow *flow;
3337
1895cc8d 3338 struct dp_packet_batch array;
8cbf4f47
DDP
3339};
3340
3341static inline void
f7ce4811
PS
3342packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
3343 struct dp_packet *packet,
3344 const struct miniflow *mf)
8cbf4f47 3345{
cf62fa4c 3346 batch->byte_count += dp_packet_size(packet);
1895cc8d
PS
3347 batch->tcp_flags |= miniflow_get_tcp_flags(mf);
3348 batch->array.packets[batch->array.count++] = packet;
8cbf4f47
DDP
3349}
3350
3351static inline void
f7ce4811
PS
3352packet_batch_per_flow_init(struct packet_batch_per_flow *batch,
3353 struct dp_netdev_flow *flow)
8cbf4f47 3354{
11e5cf1f 3355 flow->batch = batch;
8cbf4f47 3356
11e5cf1f 3357 batch->flow = flow;
1895cc8d 3358 dp_packet_batch_init(&batch->array);
8cbf4f47
DDP
3359 batch->byte_count = 0;
3360 batch->tcp_flags = 0;
8cbf4f47
DDP
3361}
3362
3363static inline void
f7ce4811
PS
3364packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
3365 struct dp_netdev_pmd_thread *pmd,
3366 long long now)
8cbf4f47
DDP
3367{
3368 struct dp_netdev_actions *actions;
3369 struct dp_netdev_flow *flow = batch->flow;
3370
1895cc8d 3371 dp_netdev_flow_used(flow, batch->array.count, batch->byte_count,
11bfdadd 3372 batch->tcp_flags, now);
8cbf4f47
DDP
3373
3374 actions = dp_netdev_flow_get_actions(flow);
3375
1895cc8d 3376 dp_netdev_execute_actions(pmd, &batch->array, true,
41ccaa24 3377 actions->actions, actions->size);
8cbf4f47
DDP
3378}
3379
8aaa125d 3380static inline void
e14deea0 3381dp_netdev_queue_batches(struct dp_packet *pkt,
9bbf1c3d 3382 struct dp_netdev_flow *flow, const struct miniflow *mf,
f7ce4811 3383 struct packet_batch_per_flow *batches, size_t *n_batches)
9bbf1c3d 3384{
f7ce4811 3385 struct packet_batch_per_flow *batch = flow->batch;
11e5cf1f 3386
f9fe365b
AZ
3387 if (OVS_UNLIKELY(!batch)) {
3388 batch = &batches[(*n_batches)++];
f7ce4811 3389 packet_batch_per_flow_init(batch, flow);
9bbf1c3d
DDP
3390 }
3391
f7ce4811 3392 packet_batch_per_flow_update(batch, pkt, mf);
9bbf1c3d
DDP
3393}
3394
9bbf1c3d 3395/* Try to process all ('cnt') the 'packets' using only the exact match cache
a90ed026 3396 * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
8aaa125d
DDP
3397 * miniflow is copied into 'keys' and the packet pointer is moved at the
3398 * beginning of the 'packets' array.
9bbf1c3d
DDP
3399 *
3400 * The function returns the number of packets that needs to be processed in the
3401 * 'packets' array (they have been moved to the beginning of the vector).
a90ed026
DDP
3402 *
3403 * If 'md_is_valid' is false, the metadata in 'packets' is not valid and must be
3404 * initialized by this function using 'port_no'.
9bbf1c3d
DDP
3405 */
3406static inline size_t
1895cc8d
PS
3407emc_processing(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets_,
3408 struct netdev_flow_key *keys,
f7ce4811 3409 struct packet_batch_per_flow batches[], size_t *n_batches,
a90ed026 3410 bool md_is_valid, odp_port_t port_no)
72865317 3411{
65f13b50 3412 struct emc_cache *flow_cache = &pmd->flow_cache;
b89c678b 3413 struct netdev_flow_key *key = &keys[0];
3d88a620 3414 size_t i, n_missed = 0, n_dropped = 0;
1895cc8d
PS
3415 struct dp_packet **packets = packets_->packets;
3416 int cnt = packets_->count;
8cbf4f47 3417
84d6d5eb 3418 for (i = 0; i < cnt; i++) {
9bbf1c3d 3419 struct dp_netdev_flow *flow;
5a2fed48 3420 struct dp_packet *packet = packets[i];
9bbf1c3d 3421
5a2fed48
AZ
3422 if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
3423 dp_packet_delete(packet);
3d88a620 3424 n_dropped++;
84d6d5eb
EJ
3425 continue;
3426 }
8cbf4f47 3427
72a5e2b8 3428 if (i != cnt - 1) {
a90ed026 3429 /* Prefetch next packet data and metadata. */
72a5e2b8 3430 OVS_PREFETCH(dp_packet_data(packets[i+1]));
a90ed026 3431 pkt_metadata_prefetch_init(&packets[i+1]->md);
72a5e2b8
DDP
3432 }
3433
a90ed026
DDP
3434 if (!md_is_valid) {
3435 pkt_metadata_init(&packet->md, port_no);
3436 }
5a2fed48 3437 miniflow_extract(packet, &key->mf);
d262ac2c 3438 key->len = 0; /* Not computed yet. */
5a2fed48 3439 key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
9bbf1c3d 3440
d262ac2c 3441 flow = emc_lookup(flow_cache, key);
8aaa125d 3442 if (OVS_LIKELY(flow)) {
5a2fed48 3443 dp_netdev_queue_batches(packet, flow, &key->mf, batches,
8aaa125d
DDP
3444 n_batches);
3445 } else {
d1aa0b94
AZ
3446 /* Exact match cache missed. Group missed packets together at
3447 * the beginning of the 'packets' array. */
b89c678b 3448 packets[n_missed] = packet;
400486f7
DDP
3449 /* 'key[n_missed]' contains the key of the current packet and it
3450 * must be returned to the caller. The next key should be extracted
3451 * to 'keys[n_missed + 1]'. */
3452 key = &keys[++n_missed];
9bbf1c3d
DDP
3453 }
3454 }
3455
3d88a620 3456 dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT, cnt - n_dropped - n_missed);
4f150744 3457
3d88a620 3458 return n_missed;
9bbf1c3d
DDP
3459}
3460
3461static inline void
65f13b50 3462fast_path_processing(struct dp_netdev_pmd_thread *pmd,
1895cc8d 3463 struct dp_packet_batch *packets_,
8aaa125d 3464 struct netdev_flow_key *keys,
f7ce4811 3465 struct packet_batch_per_flow batches[], size_t *n_batches)
9bbf1c3d 3466{
1895cc8d 3467 int cnt = packets_->count;
1a0d5831 3468#if !defined(__CHECKER__) && !defined(_WIN32)
9bbf1c3d
DDP
3469 const size_t PKT_ARRAY_SIZE = cnt;
3470#else
1a0d5831 3471 /* Sparse or MSVC doesn't like variable length array. */
cd159f1a 3472 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
9bbf1c3d 3473#endif
1895cc8d 3474 struct dp_packet **packets = packets_->packets;
0de8783a 3475 struct dpcls_rule *rules[PKT_ARRAY_SIZE];
65f13b50
AW
3476 struct dp_netdev *dp = pmd->dp;
3477 struct emc_cache *flow_cache = &pmd->flow_cache;
1895cc8d 3478 struct dp_packet_batch b;
8aaa125d 3479 int miss_cnt = 0, lost_cnt = 0;
9bbf1c3d 3480 bool any_miss;
8aaa125d 3481 size_t i;
9bbf1c3d
DDP
3482
3483 for (i = 0; i < cnt; i++) {
0de8783a 3484 /* Key length is needed in all the cases, hash computed on demand. */
361d808d 3485 keys[i].len = netdev_flow_key_size(miniflow_n_values(&keys[i].mf));
9bbf1c3d 3486 }
1c1e46ed 3487 any_miss = !dpcls_lookup(&pmd->cls, keys, rules, cnt);
623540e4
EJ
3488 if (OVS_UNLIKELY(any_miss) && !fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
3489 uint64_t actions_stub[512 / 8], slow_stub[512 / 8];
3490 struct ofpbuf actions, put_actions;
7af12bd7 3491 ovs_u128 ufid;
623540e4
EJ
3492
3493 ofpbuf_use_stub(&actions, actions_stub, sizeof actions_stub);
3494 ofpbuf_use_stub(&put_actions, slow_stub, sizeof slow_stub);
3495
3496 for (i = 0; i < cnt; i++) {
0de8783a 3497 struct dp_netdev_flow *netdev_flow;
623540e4 3498 struct ofpbuf *add_actions;
0de8783a 3499 struct match match;
623540e4
EJ
3500 int error;
3501
0de8783a 3502 if (OVS_LIKELY(rules[i])) {
623540e4
EJ
3503 continue;
3504 }
3505
3506 /* It's possible that an earlier slow path execution installed
0de8783a 3507 * a rule covering this flow. In this case, it's a lot cheaper
623540e4 3508 * to catch it here than execute a miss. */
1c1e46ed 3509 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
623540e4 3510 if (netdev_flow) {
0de8783a 3511 rules[i] = &netdev_flow->cr;
623540e4
EJ
3512 continue;
3513 }
3514
60fc3b7b
DDP
3515 miss_cnt++;
3516
4d8f90b1 3517 match.tun_md.valid = false;
0de8783a 3518 miniflow_expand(&keys[i].mf, &match.flow);
623540e4
EJ
3519
3520 ofpbuf_clear(&actions);
3521 ofpbuf_clear(&put_actions);
3522
7af12bd7 3523 dpif_flow_hash(dp->dpif, &match.flow, sizeof match.flow, &ufid);
1c1e46ed 3524 error = dp_netdev_upcall(pmd, packets[i], &match.flow, &match.wc,
7af12bd7 3525 &ufid, DPIF_UC_MISS, NULL, &actions,
0de8783a 3526 &put_actions);
623540e4 3527 if (OVS_UNLIKELY(error && error != ENOSPC)) {
7ad20cbd 3528 dp_packet_delete(packets[i]);
60fc3b7b 3529 lost_cnt++;
623540e4
EJ
3530 continue;
3531 }
3532
449b8131
JR
3533 /* The Netlink encoding of datapath flow keys cannot express
3534 * wildcarding the presence of a VLAN tag. Instead, a missing VLAN
3535 * tag is interpreted as exact match on the fact that there is no
3536 * VLAN. Unless we refactor a lot of code that translates between
3537 * Netlink and struct flow representations, we have to do the same
3538 * here. */
3539 if (!match.wc.masks.vlan_tci) {
3540 match.wc.masks.vlan_tci = htons(0xffff);
3541 }
3542
623540e4
EJ
3543 /* We can't allow the packet batching in the next loop to execute
3544 * the actions. Otherwise, if there are any slow path actions,
3545 * we'll send the packet up twice. */
1895cc8d
PS
3546 packet_batch_init_packet(&b, packets[i]);
3547 dp_netdev_execute_actions(pmd, &b, true,
6fd6ed71 3548 actions.data, actions.size);
623540e4 3549
6fd6ed71 3550 add_actions = put_actions.size ? &put_actions : &actions;
0de8783a
JR
3551 if (OVS_LIKELY(error != ENOSPC)) {
3552 /* XXX: There's a race window where a flow covering this packet
3553 * could have already been installed since we last did the flow
3554 * lookup before upcall. This could be solved by moving the
3555 * mutex lock outside the loop, but that's an awful long time
3556 * to be locking everyone out of making flow installs. If we
3557 * move to a per-core classifier, it would be reasonable. */
1c1e46ed
AW
3558 ovs_mutex_lock(&pmd->flow_mutex);
3559 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, &keys[i]);
0de8783a 3560 if (OVS_LIKELY(!netdev_flow)) {
1c1e46ed 3561 netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
6fd6ed71
PS
3562 add_actions->data,
3563 add_actions->size);
0de8783a 3564 }
1c1e46ed 3565 ovs_mutex_unlock(&pmd->flow_mutex);
0de8783a 3566
0de8783a 3567 emc_insert(flow_cache, &keys[i], netdev_flow);
623540e4 3568 }
623540e4
EJ
3569 }
3570
3571 ofpbuf_uninit(&actions);
3572 ofpbuf_uninit(&put_actions);
3573 fat_rwlock_unlock(&dp->upcall_rwlock);
60fc3b7b 3574 dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
ac8c2081 3575 } else if (OVS_UNLIKELY(any_miss)) {
ac8c2081 3576 for (i = 0; i < cnt; i++) {
0de8783a 3577 if (OVS_UNLIKELY(!rules[i])) {
e14deea0 3578 dp_packet_delete(packets[i]);
8aaa125d
DDP
3579 lost_cnt++;
3580 miss_cnt++;
ac8c2081
DDP
3581 }
3582 }
623540e4 3583 }
84d6d5eb 3584
8cbf4f47 3585 for (i = 0; i < cnt; i++) {
e14deea0 3586 struct dp_packet *packet = packets[i];
84d6d5eb 3587 struct dp_netdev_flow *flow;
8cbf4f47 3588
0de8783a 3589 if (OVS_UNLIKELY(!rules[i])) {
84d6d5eb
EJ
3590 continue;
3591 }
3592
84d6d5eb 3593 flow = dp_netdev_flow_cast(rules[i]);
0de8783a 3594
0de8783a 3595 emc_insert(flow_cache, &keys[i], flow);
8aaa125d 3596 dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
8cbf4f47
DDP
3597 }
3598
8aaa125d
DDP
3599 dp_netdev_count_packet(pmd, DP_STAT_MASKED_HIT, cnt - miss_cnt);
3600 dp_netdev_count_packet(pmd, DP_STAT_MISS, miss_cnt);
3601 dp_netdev_count_packet(pmd, DP_STAT_LOST, lost_cnt);
72865317
BP
3602}
3603
a90ed026
DDP
3604/* Packets enter the datapath from a port (or from recirculation) here.
3605 *
3606 * For performance reasons a caller may choose not to initialize the metadata
3607 * in 'packets': in this case 'mdinit' is false and this function needs to
3608 * initialize it using 'port_no'. If the metadata in 'packets' is already
3609 * valid, 'md_is_valid' must be true and 'port_no' will be ignored. */
adcf00ba 3610static void
a90ed026 3611dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
1895cc8d 3612 struct dp_packet_batch *packets,
a90ed026 3613 bool md_is_valid, odp_port_t port_no)
9bbf1c3d 3614{
1895cc8d 3615 int cnt = packets->count;
1a0d5831 3616#if !defined(__CHECKER__) && !defined(_WIN32)
9bbf1c3d
DDP
3617 const size_t PKT_ARRAY_SIZE = cnt;
3618#else
1a0d5831 3619 /* Sparse or MSVC doesn't like variable length array. */
cd159f1a 3620 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
9bbf1c3d
DDP
3621#endif
3622 struct netdev_flow_key keys[PKT_ARRAY_SIZE];
f7ce4811 3623 struct packet_batch_per_flow batches[PKT_ARRAY_SIZE];
11bfdadd 3624 long long now = time_msec();
8aaa125d 3625 size_t newcnt, n_batches, i;
9bbf1c3d 3626
8aaa125d 3627 n_batches = 0;
1895cc8d 3628 newcnt = emc_processing(pmd, packets, keys, batches, &n_batches,
a90ed026 3629 md_is_valid, port_no);
9bbf1c3d 3630 if (OVS_UNLIKELY(newcnt)) {
1895cc8d
PS
3631 packets->count = newcnt;
3632 fast_path_processing(pmd, packets, keys, batches, &n_batches);
8aaa125d
DDP
3633 }
3634
603f2ce0
EJ
3635 for (i = 0; i < n_batches; i++) {
3636 batches[i].flow->batch = NULL;
3637 }
3638
8aaa125d 3639 for (i = 0; i < n_batches; i++) {
f7ce4811 3640 packet_batch_per_flow_execute(&batches[i], pmd, now);
9bbf1c3d
DDP
3641 }
3642}
3643
a90ed026
DDP
3644static void
3645dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
1895cc8d 3646 struct dp_packet_batch *packets,
a90ed026
DDP
3647 odp_port_t port_no)
3648{
1895cc8d 3649 dp_netdev_input__(pmd, packets, false, port_no);
a90ed026
DDP
3650}
3651
3652static void
3653dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
1895cc8d 3654 struct dp_packet_batch *packets)
a90ed026 3655{
1895cc8d 3656 dp_netdev_input__(pmd, packets, true, 0);
a90ed026
DDP
3657}
3658
9080a111 3659struct dp_netdev_execute_aux {
65f13b50 3660 struct dp_netdev_pmd_thread *pmd;
9080a111
JR
3661};
3662
e4e74c3a
AW
3663static void
3664dpif_netdev_register_dp_purge_cb(struct dpif *dpif, dp_purge_callback *cb,
3665 void *aux)
3666{
3667 struct dp_netdev *dp = get_dp_netdev(dpif);
3668 dp->dp_purge_aux = aux;
3669 dp->dp_purge_cb = cb;
3670}
3671
6b31e073 3672static void
623540e4
EJ
3673dpif_netdev_register_upcall_cb(struct dpif *dpif, upcall_callback *cb,
3674 void *aux)
6b31e073
RW
3675{
3676 struct dp_netdev *dp = get_dp_netdev(dpif);
623540e4 3677 dp->upcall_aux = aux;
6b31e073
RW
3678 dp->upcall_cb = cb;
3679}
3680
a36de779
PS
3681static int
3682push_tnl_action(const struct dp_netdev *dp,
1895cc8d
PS
3683 const struct nlattr *attr,
3684 struct dp_packet_batch *batch)
a36de779
PS
3685{
3686 struct dp_netdev_port *tun_port;
3687 const struct ovs_action_push_tnl *data;
4c742796 3688 int err;
a36de779
PS
3689
3690 data = nl_attr_get(attr);
3691
3692 tun_port = dp_netdev_lookup_port(dp, u32_to_odp(data->tnl_port));
3693 if (!tun_port) {
4c742796
PS
3694 err = -EINVAL;
3695 goto error;
a36de779 3696 }
4c742796
PS
3697 err = netdev_push_header(tun_port->netdev, batch, data);
3698 if (!err) {
3699 return 0;
3700 }
3701error:
3702 dp_packet_delete_batch(batch, true);
3703 return err;
a36de779
PS
3704}
3705
3706static void
1895cc8d 3707dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
09f9da0b 3708 const struct nlattr *a, bool may_steal)
8a4e3a85 3709 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
3710{
3711 struct dp_netdev_execute_aux *aux = aux_;
623540e4 3712 uint32_t *depth = recirc_depth_get();
28e2fa02
DDP
3713 struct dp_netdev_pmd_thread *pmd = aux->pmd;
3714 struct dp_netdev *dp = pmd->dp;
09f9da0b 3715 int type = nl_attr_type(a);
8a4e3a85 3716 struct dp_netdev_port *p;
9080a111 3717
09f9da0b
JR
3718 switch ((enum ovs_action_attr)type) {
3719 case OVS_ACTION_ATTR_OUTPUT:
623540e4 3720 p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a)));
26a5075b 3721 if (OVS_LIKELY(p)) {
347ba9bb
IM
3722 int tx_qid;
3723
3724 atomic_read_relaxed(&pmd->tx_qid, &tx_qid);
3725
1895cc8d 3726 netdev_send(p->netdev, tx_qid, packets_, may_steal);
ac8c2081 3727 return;
8a4e3a85 3728 }
09f9da0b
JR
3729 break;
3730
a36de779
PS
3731 case OVS_ACTION_ATTR_TUNNEL_PUSH:
3732 if (*depth < MAX_RECIRC_DEPTH) {
1895cc8d 3733 struct dp_packet_batch tnl_pkt;
a36de779
PS
3734 int err;
3735
3736 if (!may_steal) {
1895cc8d
PS
3737 dp_packet_batch_clone(&tnl_pkt, packets_);
3738 packets_ = &tnl_pkt;
a36de779
PS
3739 }
3740
1895cc8d 3741 err = push_tnl_action(dp, a, packets_);
a36de779
PS
3742 if (!err) {
3743 (*depth)++;
1895cc8d 3744 dp_netdev_recirculate(pmd, packets_);
a36de779 3745 (*depth)--;
a36de779
PS
3746 }
3747 return;
3748 }
3749 break;
3750
3751 case OVS_ACTION_ATTR_TUNNEL_POP:
3752 if (*depth < MAX_RECIRC_DEPTH) {
3753 odp_port_t portno = u32_to_odp(nl_attr_get_u32(a));
3754
3755 p = dp_netdev_lookup_port(dp, portno);
3756 if (p) {
1895cc8d 3757 struct dp_packet_batch tnl_pkt;
9235b479 3758 int i;
a36de779
PS
3759
3760 if (!may_steal) {
1895cc8d
PS
3761 dp_packet_batch_clone(&tnl_pkt, packets_);
3762 packets_ = &tnl_pkt;
a36de779
PS
3763 }
3764
9235b479 3765 netdev_pop_header(p->netdev, packets_);
1895cc8d 3766 if (!packets_->count) {
1c8f98d9
PS
3767 return;
3768 }
9235b479
PS
3769
3770 for (i = 0; i < packets_->count; i++) {
3771 packets_->packets[i]->md.in_port.odp_port = portno;
a36de779 3772 }
9235b479
PS
3773
3774 (*depth)++;
3775 dp_netdev_recirculate(pmd, packets_);
3776 (*depth)--;
a36de779
PS
3777 return;
3778 }
3779 }
3780 break;
3781
623540e4
EJ
3782 case OVS_ACTION_ATTR_USERSPACE:
3783 if (!fat_rwlock_tryrdlock(&dp->upcall_rwlock)) {
1895cc8d 3784 struct dp_packet **packets = packets_->packets;
623540e4
EJ
3785 const struct nlattr *userdata;
3786 struct ofpbuf actions;
3787 struct flow flow;
7af12bd7 3788 ovs_u128 ufid;
1c8f98d9 3789 int i;
4fc65926 3790
623540e4
EJ
3791 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
3792 ofpbuf_init(&actions, 0);
8cbf4f47 3793
1895cc8d 3794 for (i = 0; i < packets_->count; i++) {
623540e4 3795 int error;
1895cc8d 3796 struct dp_packet_batch b;
623540e4
EJ
3797
3798 ofpbuf_clear(&actions);
3799
cf62fa4c 3800 flow_extract(packets[i], &flow);
7af12bd7 3801 dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
1c1e46ed 3802 error = dp_netdev_upcall(pmd, packets[i], &flow, NULL, &ufid,
7af12bd7 3803 DPIF_UC_ACTION, userdata,&actions,
623540e4
EJ
3804 NULL);
3805 if (!error || error == ENOSPC) {
1895cc8d
PS
3806 packet_batch_init_packet(&b, packets[i]);
3807 dp_netdev_execute_actions(pmd, &b, may_steal,
6fd6ed71 3808 actions.data, actions.size);
ac8c2081 3809 } else if (may_steal) {
e14deea0 3810 dp_packet_delete(packets[i]);
623540e4 3811 }
db73f716 3812 }
623540e4
EJ
3813 ofpbuf_uninit(&actions);
3814 fat_rwlock_unlock(&dp->upcall_rwlock);
6b31e073 3815
ac8c2081
DDP
3816 return;
3817 }
09f9da0b 3818 break;
572f732a 3819
adcf00ba
AZ
3820 case OVS_ACTION_ATTR_RECIRC:
3821 if (*depth < MAX_RECIRC_DEPTH) {
1895cc8d 3822 struct dp_packet_batch recirc_pkts;
1c8f98d9 3823 int i;
572f732a 3824
28e2fa02 3825 if (!may_steal) {
1895cc8d
PS
3826 dp_packet_batch_clone(&recirc_pkts, packets_);
3827 packets_ = &recirc_pkts;
28e2fa02 3828 }
8cbf4f47 3829
1895cc8d
PS
3830 for (i = 0; i < packets_->count; i++) {
3831 packets_->packets[i]->md.recirc_id = nl_attr_get_u32(a);
8cbf4f47 3832 }
28e2fa02
DDP
3833
3834 (*depth)++;
1895cc8d 3835 dp_netdev_recirculate(pmd, packets_);
adcf00ba
AZ
3836 (*depth)--;
3837
ac8c2081 3838 return;
adcf00ba 3839 }
ac8c2081
DDP
3840
3841 VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
572f732a 3842 break;
572f732a 3843
07659514
JS
3844 case OVS_ACTION_ATTR_CT:
3845 /* If a flow with this action is slow-pathed, datapath assistance is
3846 * required to implement it. However, we don't support this action
3847 * in the userspace datapath. */
3848 VLOG_WARN("Cannot execute conntrack action in userspace.");
3849 break;
3850
09f9da0b
JR
3851 case OVS_ACTION_ATTR_PUSH_VLAN:
3852 case OVS_ACTION_ATTR_POP_VLAN:
3853 case OVS_ACTION_ATTR_PUSH_MPLS:
3854 case OVS_ACTION_ATTR_POP_MPLS:
3855 case OVS_ACTION_ATTR_SET:
6d670e7f 3856 case OVS_ACTION_ATTR_SET_MASKED:
09f9da0b 3857 case OVS_ACTION_ATTR_SAMPLE:
53e1d6f1 3858 case OVS_ACTION_ATTR_HASH:
09f9da0b
JR
3859 case OVS_ACTION_ATTR_UNSPEC:
3860 case __OVS_ACTION_ATTR_MAX:
3861 OVS_NOT_REACHED();
da546e07 3862 }
ac8c2081 3863
1895cc8d 3864 dp_packet_delete_batch(packets_, may_steal);
98403001
BP
3865}
3866
4edb9ae9 3867static void
65f13b50 3868dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
1895cc8d 3869 struct dp_packet_batch *packets,
41ccaa24 3870 bool may_steal,
9080a111 3871 const struct nlattr *actions, size_t actions_len)
72865317 3872{
41ccaa24 3873 struct dp_netdev_execute_aux aux = { pmd };
9080a111 3874
1895cc8d 3875 odp_execute_actions(&aux, packets, may_steal, actions,
8cbf4f47 3876 actions_len, dp_execute_cb);
72865317
BP
3877}
3878
3879const struct dpif_class dpif_netdev_class = {
72865317 3880 "netdev",
6553d06b 3881 dpif_netdev_init,
2197d7ab 3882 dpif_netdev_enumerate,
0aeaabc8 3883 dpif_netdev_port_open_type,
72865317
BP
3884 dpif_netdev_open,
3885 dpif_netdev_close,
7dab847a 3886 dpif_netdev_destroy,
e4cfed38
PS
3887 dpif_netdev_run,
3888 dpif_netdev_wait,
72865317 3889 dpif_netdev_get_stats,
72865317
BP
3890 dpif_netdev_port_add,
3891 dpif_netdev_port_del,
3892 dpif_netdev_port_query_by_number,
3893 dpif_netdev_port_query_by_name,
98403001 3894 NULL, /* port_get_pid */
b0ec0f27
BP
3895 dpif_netdev_port_dump_start,
3896 dpif_netdev_port_dump_next,
3897 dpif_netdev_port_dump_done,
72865317
BP
3898 dpif_netdev_port_poll,
3899 dpif_netdev_port_poll_wait,
72865317 3900 dpif_netdev_flow_flush,
ac64794a
BP
3901 dpif_netdev_flow_dump_create,
3902 dpif_netdev_flow_dump_destroy,
3903 dpif_netdev_flow_dump_thread_create,
3904 dpif_netdev_flow_dump_thread_destroy,
704a1e09 3905 dpif_netdev_flow_dump_next,
1a0c894a 3906 dpif_netdev_operate,
6b31e073
RW
3907 NULL, /* recv_set */
3908 NULL, /* handlers_set */
f2eee189 3909 dpif_netdev_pmd_set,
5bf93d67 3910 dpif_netdev_queue_to_priority,
6b31e073
RW
3911 NULL, /* recv */
3912 NULL, /* recv_wait */
3913 NULL, /* recv_purge */
e4e74c3a 3914 dpif_netdev_register_dp_purge_cb,
6b31e073
RW
3915 dpif_netdev_register_upcall_cb,
3916 dpif_netdev_enable_upcall,
3917 dpif_netdev_disable_upcall,
b5cbbcf6 3918 dpif_netdev_get_datapath_version,
b77d9629
DDP
3919 NULL, /* ct_dump_start */
3920 NULL, /* ct_dump_next */
3921 NULL, /* ct_dump_done */
a0f7b6d5 3922 NULL, /* ct_flush */
72865317 3923};
614c4892 3924
74cc3969
BP
3925static void
3926dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
3927 const char *argv[], void *aux OVS_UNUSED)
3928{
59e6d833
BP
3929 struct dp_netdev_port *old_port;
3930 struct dp_netdev_port *new_port;
74cc3969 3931 struct dp_netdev *dp;
ff073a71 3932 odp_port_t port_no;
74cc3969 3933
8a4e3a85 3934 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
3935 dp = shash_find_data(&dp_netdevs, argv[1]);
3936 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 3937 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
3938 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
3939 return;
3940 }
8a4e3a85
BP
3941 ovs_refcount_ref(&dp->ref_cnt);
3942 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 3943
59e6d833
BP
3944 ovs_mutex_lock(&dp->port_mutex);
3945 if (get_port_by_name(dp, argv[2], &old_port)) {
74cc3969 3946 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 3947 goto exit;
74cc3969
BP
3948 }
3949
ff073a71
BP
3950 port_no = u32_to_odp(atoi(argv[3]));
3951 if (!port_no || port_no == ODPP_NONE) {
74cc3969 3952 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 3953 goto exit;
74cc3969 3954 }
ff073a71 3955 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 3956 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 3957 goto exit;
74cc3969 3958 }
59e6d833
BP
3959
3960 /* Remove old port. */
35303d71 3961 cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
59e6d833
BP
3962 ovsrcu_postpone(free, old_port);
3963
3964 /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
3965 new_port = xmemdup(old_port, sizeof *old_port);
35303d71 3966 new_port->port_no = port_no;
59e6d833
BP
3967 cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
3968
d33ed218 3969 seq_change(dp->port_seq);
74cc3969 3970 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
3971
3972exit:
59e6d833 3973 ovs_mutex_unlock(&dp->port_mutex);
8a4e3a85 3974 dp_netdev_unref(dp);
74cc3969
BP
3975}
3976
0cbfe35d
BP
3977static void
3978dpif_dummy_register__(const char *type)
3979{
3980 struct dpif_class *class;
3981
3982 class = xmalloc(sizeof *class);
3983 *class = dpif_netdev_class;
3984 class->type = xstrdup(type);
3985 dp_register_provider(class);
3986}
3987
8420c7ad
BP
3988static void
3989dpif_dummy_override(const char *type)
3990{
65d43fdc
YT
3991 int error;
3992
3993 /*
3994 * Ignore EAFNOSUPPORT to allow --enable-dummy=system with
3995 * a userland-only build. It's useful for testsuite.
3996 */
3997 error = dp_unregister_provider(type);
3998 if (error == 0 || error == EAFNOSUPPORT) {
8420c7ad
BP
3999 dpif_dummy_register__(type);
4000 }
4001}
4002
614c4892 4003void
8420c7ad 4004dpif_dummy_register(enum dummy_level level)
614c4892 4005{
8420c7ad 4006 if (level == DUMMY_OVERRIDE_ALL) {
0cbfe35d
BP
4007 struct sset types;
4008 const char *type;
4009
4010 sset_init(&types);
4011 dp_enumerate_types(&types);
4012 SSET_FOR_EACH (type, &types) {
8420c7ad 4013 dpif_dummy_override(type);
0cbfe35d
BP
4014 }
4015 sset_destroy(&types);
8420c7ad
BP
4016 } else if (level == DUMMY_OVERRIDE_SYSTEM) {
4017 dpif_dummy_override("system");
614c4892 4018 }
0cbfe35d
BP
4019
4020 dpif_dummy_register__("dummy");
74cc3969
BP
4021
4022 unixctl_command_register("dpif-dummy/change-port-number",
74467d5c 4023 "dp port new-number",
74cc3969 4024 3, 3, dpif_dummy_change_port_number, NULL);
614c4892 4025}
0de8783a
JR
4026\f
4027/* Datapath Classifier. */
4028
4029/* A set of rules that all have the same fields wildcarded. */
4030struct dpcls_subtable {
4031 /* The fields are only used by writers. */
4032 struct cmap_node cmap_node OVS_GUARDED; /* Within dpcls 'subtables_map'. */
4033
4034 /* These fields are accessed by readers. */
4035 struct cmap rules; /* Contains "struct dpcls_rule"s. */
4036 struct netdev_flow_key mask; /* Wildcards for fields (const). */
4037 /* 'mask' must be the last field, additional space is allocated here. */
4038};
4039
4040/* Initializes 'cls' as a classifier that initially contains no classification
4041 * rules. */
4042static void
4043dpcls_init(struct dpcls *cls)
4044{
4045 cmap_init(&cls->subtables_map);
4046 pvector_init(&cls->subtables);
4047}
4048
4049static void
4050dpcls_destroy_subtable(struct dpcls *cls, struct dpcls_subtable *subtable)
4051{
4052 pvector_remove(&cls->subtables, subtable);
4053 cmap_remove(&cls->subtables_map, &subtable->cmap_node,
4054 subtable->mask.hash);
4055 cmap_destroy(&subtable->rules);
4056 ovsrcu_postpone(free, subtable);
4057}
4058
4059/* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the
4060 * caller's responsibility.
4061 * May only be called after all the readers have been terminated. */
4062static void
4063dpcls_destroy(struct dpcls *cls)
4064{
4065 if (cls) {
4066 struct dpcls_subtable *subtable;
4067
4068 CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
361d808d 4069 ovs_assert(cmap_count(&subtable->rules) == 0);
0de8783a
JR
4070 dpcls_destroy_subtable(cls, subtable);
4071 }
4072 cmap_destroy(&cls->subtables_map);
4073 pvector_destroy(&cls->subtables);
4074 }
4075}
4076
4077static struct dpcls_subtable *
4078dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
4079{
4080 struct dpcls_subtable *subtable;
4081
4082 /* Need to add one. */
caeb4906
JR
4083 subtable = xmalloc(sizeof *subtable
4084 - sizeof subtable->mask.mf + mask->len);
0de8783a
JR
4085 cmap_init(&subtable->rules);
4086 netdev_flow_key_clone(&subtable->mask, mask);
4087 cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
4088 pvector_insert(&cls->subtables, subtable, 0);
802f84ff 4089 pvector_publish(&cls->subtables);
0de8783a
JR
4090
4091 return subtable;
4092}
4093
4094static inline struct dpcls_subtable *
4095dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
4096{
4097 struct dpcls_subtable *subtable;
4098
4099 CMAP_FOR_EACH_WITH_HASH (subtable, cmap_node, mask->hash,
4100 &cls->subtables_map) {
4101 if (netdev_flow_key_equal(&subtable->mask, mask)) {
4102 return subtable;
4103 }
4104 }
4105 return dpcls_create_subtable(cls, mask);
4106}
4107
4108/* Insert 'rule' into 'cls'. */
4109static void
4110dpcls_insert(struct dpcls *cls, struct dpcls_rule *rule,
4111 const struct netdev_flow_key *mask)
4112{
4113 struct dpcls_subtable *subtable = dpcls_find_subtable(cls, mask);
4114
4115 rule->mask = &subtable->mask;
4116 cmap_insert(&subtable->rules, &rule->cmap_node, rule->flow.hash);
4117}
4118
4119/* Removes 'rule' from 'cls', also destructing the 'rule'. */
4120static void
4121dpcls_remove(struct dpcls *cls, struct dpcls_rule *rule)
4122{
4123 struct dpcls_subtable *subtable;
4124
4125 ovs_assert(rule->mask);
4126
4127 INIT_CONTAINER(subtable, rule->mask, mask);
4128
4129 if (cmap_remove(&subtable->rules, &rule->cmap_node, rule->flow.hash)
4130 == 0) {
4131 dpcls_destroy_subtable(cls, subtable);
802f84ff 4132 pvector_publish(&cls->subtables);
0de8783a
JR
4133 }
4134}
4135
361d808d
JR
4136/* Returns true if 'target' satisfies 'key' in 'mask', that is, if each 1-bit
4137 * in 'mask' the values in 'key' and 'target' are the same. */
0de8783a
JR
4138static inline bool
4139dpcls_rule_matches_key(const struct dpcls_rule *rule,
4140 const struct netdev_flow_key *target)
4141{
09b0fa9c
JR
4142 const uint64_t *keyp = miniflow_get_values(&rule->flow.mf);
4143 const uint64_t *maskp = miniflow_get_values(&rule->mask->mf);
5fcff47b 4144 uint64_t value;
0de8783a 4145
5fcff47b
JR
4146 NETDEV_FLOW_KEY_FOR_EACH_IN_FLOWMAP(value, target, rule->flow.mf.map) {
4147 if (OVS_UNLIKELY((value & *maskp++) != *keyp++)) {
0de8783a
JR
4148 return false;
4149 }
4150 }
4151 return true;
4152}
4153
4154/* For each miniflow in 'flows' performs a classifier lookup writing the result
4155 * into the corresponding slot in 'rules'. If a particular entry in 'flows' is
4156 * NULL it is skipped.
4157 *
4158 * This function is optimized for use in the userspace datapath and therefore
4159 * does not implement a lot of features available in the standard
4160 * classifier_lookup() function. Specifically, it does not implement
4161 * priorities, instead returning any rule which matches the flow.
4162 *
4163 * Returns true if all flows found a corresponding rule. */
4164static bool
4165dpcls_lookup(const struct dpcls *cls, const struct netdev_flow_key keys[],
4166 struct dpcls_rule **rules, const size_t cnt)
4167{
4168 /* The batch size 16 was experimentally found faster than 8 or 32. */
4169 typedef uint16_t map_type;
4170#define MAP_BITS (sizeof(map_type) * CHAR_BIT)
4171
4172#if !defined(__CHECKER__) && !defined(_WIN32)
4173 const int N_MAPS = DIV_ROUND_UP(cnt, MAP_BITS);
4174#else
cd159f1a 4175 enum { N_MAPS = DIV_ROUND_UP(NETDEV_MAX_BURST, MAP_BITS) };
0de8783a
JR
4176#endif
4177 map_type maps[N_MAPS];
4178 struct dpcls_subtable *subtable;
4179
4180 memset(maps, 0xff, sizeof maps);
4181 if (cnt % MAP_BITS) {
4182 maps[N_MAPS - 1] >>= MAP_BITS - cnt % MAP_BITS; /* Clear extra bits. */
4183 }
4184 memset(rules, 0, cnt * sizeof *rules);
4185
4186 PVECTOR_FOR_EACH (subtable, &cls->subtables) {
4187 const struct netdev_flow_key *mkeys = keys;
4188 struct dpcls_rule **mrules = rules;
4189 map_type remains = 0;
4190 int m;
4191
4192 BUILD_ASSERT_DECL(sizeof remains == sizeof *maps);
4193
4194 for (m = 0; m < N_MAPS; m++, mkeys += MAP_BITS, mrules += MAP_BITS) {
4195 uint32_t hashes[MAP_BITS];
4196 const struct cmap_node *nodes[MAP_BITS];
4197 unsigned long map = maps[m];
4198 int i;
4199
4200 if (!map) {
4201 continue; /* Skip empty maps. */
4202 }
4203
4204 /* Compute hashes for the remaining keys. */
3ee6026a 4205 ULLONG_FOR_EACH_1(i, map) {
0de8783a
JR
4206 hashes[i] = netdev_flow_key_hash_in_mask(&mkeys[i],
4207 &subtable->mask);
4208 }
4209 /* Lookup. */
4210 map = cmap_find_batch(&subtable->rules, map, hashes, nodes);
4211 /* Check results. */
3ee6026a 4212 ULLONG_FOR_EACH_1(i, map) {
0de8783a
JR
4213 struct dpcls_rule *rule;
4214
4215 CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
4216 if (OVS_LIKELY(dpcls_rule_matches_key(rule, &mkeys[i]))) {
4217 mrules[i] = rule;
4218 goto next;
4219 }
4220 }
3ee6026a 4221 ULLONG_SET0(map, i); /* Did not match. */
0de8783a
JR
4222 next:
4223 ; /* Keep Sparse happy. */
4224 }
4225 maps[m] &= ~map; /* Clear the found rules. */
4226 remains |= maps[m];
4227 }
4228 if (!remains) {
4229 return true; /* All found. */
4230 }
4231 }
4232 return false; /* Some misses. */
4233}