]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpif-netdev.c
ofproto: Break out monitor deletion code
[mirror_ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
ff073a71 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
2c0ea78f 34#include "classifier.h"
59e6d833 35#include "cmap.h"
72865317 36#include "csum.h"
614c4892 37#include "dpif.h"
72865317 38#include "dpif-provider.h"
614c4892 39#include "dummy.h"
36956a7d 40#include "dynamic-string.h"
72865317
BP
41#include "flow.h"
42#include "hmap.h"
6c3eee82 43#include "latch.h"
72865317 44#include "list.h"
8c301900 45#include "meta-flow.h"
72865317 46#include "netdev.h"
8617afff 47#include "netdev-dpdk.h"
de281153 48#include "netdev-vport.h"
cdee00fd 49#include "netlink.h"
f094af7b 50#include "odp-execute.h"
72865317
BP
51#include "odp-util.h"
52#include "ofp-print.h"
53#include "ofpbuf.h"
61e7deb1 54#include "ovs-rcu.h"
91088554 55#include "packet-dpif.h"
72865317
BP
56#include "packets.h"
57#include "poll-loop.h"
26c6b6cd 58#include "random.h"
d33ed218 59#include "seq.h"
462278db 60#include "shash.h"
0cbfe35d 61#include "sset.h"
72865317 62#include "timeval.h"
74cc3969 63#include "unixctl.h"
72865317 64#include "util.h"
72865317 65#include "vlog.h"
5136ce49 66
d98e6007 67VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 68
2c0ea78f
GS
69/* By default, choose a priority in the middle. */
70#define NETDEV_RULE_PRIORITY 0x8000
71
e4cfed38 72#define NR_THREADS 1
adcf00ba
AZ
73/* Use per thread recirc_depth to prevent recirculation loop. */
74#define MAX_RECIRC_DEPTH 5
75DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
e4cfed38 76
72865317 77/* Configuration parameters. */
72865317
BP
78enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
79
856081f6 80/* Queues. */
856081f6
BP
81enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
82enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
83BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
84
8a4e3a85
BP
85/* Protects against changes to 'dp_netdevs'. */
86static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
87
88/* Contains all 'struct dp_netdev's. */
89static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
90 = SHASH_INITIALIZER(&dp_netdevs);
91
d88b629b
BP
92struct dp_netdev_upcall {
93 struct dpif_upcall upcall; /* Queued upcall information. */
94 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
95};
96
63be20be 97/* A queue passing packets from a struct dp_netdev to its clients (handlers).
8a4e3a85
BP
98 *
99 *
100 * Thread-safety
101 * =============
102 *
63be20be
AW
103 * Any access at all requires the owning 'dp_netdev''s queue_rwlock and
104 * its own mutex. */
856081f6 105struct dp_netdev_queue {
63be20be
AW
106 struct ovs_mutex mutex;
107 struct seq *seq; /* Incremented whenever a packet is queued. */
f5126b57
BP
108 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
109 unsigned int head OVS_GUARDED;
110 unsigned int tail OVS_GUARDED;
856081f6
BP
111};
112
8a4e3a85
BP
113/* Datapath based on the network device interface from netdev.h.
114 *
115 *
116 * Thread-safety
117 * =============
118 *
119 * Some members, marked 'const', are immutable. Accessing other members
120 * requires synchronization, as noted in more detail below.
121 *
122 * Acquisition order is, from outermost to innermost:
123 *
124 * dp_netdev_mutex (global)
59e6d833 125 * port_mutex
8a4e3a85
BP
126 * flow_mutex
127 * cls.rwlock
63be20be 128 * queue_rwlock
8a4e3a85 129 */
72865317 130struct dp_netdev {
8a4e3a85
BP
131 const struct dpif_class *const class;
132 const char *const name;
6a8267c5
BP
133 struct ovs_refcount ref_cnt;
134 atomic_flag destroyed;
72865317 135
8a4e3a85
BP
136 /* Flows.
137 *
138 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
139 *
140 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
141 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
142 * atomically perform multiple operations on 'cls' and 'flow_table'.)
143 */
144 struct ovs_mutex flow_mutex;
145 struct classifier cls; /* Classifier. Protected by cls.rwlock. */
146 struct hmap flow_table OVS_GUARDED; /* Flow table. */
147
148 /* Queues.
149 *
63be20be
AW
150 * 'queue_rwlock' protects the modification of 'handler_queues' and
151 * 'n_handlers'. The queue elements are protected by its
152 * 'handler_queues''s mutex. */
153 struct fat_rwlock queue_rwlock;
154 struct dp_netdev_queue *handler_queues;
155 uint32_t n_handlers;
72865317 156
8a4e3a85
BP
157 /* Statistics.
158 *
51852a57
BP
159 * ovsthread_stats is internally synchronized. */
160 struct ovsthread_stats stats; /* Contains 'struct dp_netdev_stats *'. */
72865317 161
8a4e3a85
BP
162 /* Ports.
163 *
59e6d833
BP
164 * Protected by RCU. Take the mutex to add or remove ports. */
165 struct ovs_mutex port_mutex;
166 struct cmap ports;
d33ed218 167 struct seq *port_seq; /* Incremented whenever a port changes. */
6c3eee82
BP
168
169 /* Forwarding threads. */
170 struct latch exit_latch;
e4cfed38
PS
171 struct pmd_thread *pmd_threads;
172 size_t n_pmd_threads;
173 int pmd_count;
72865317
BP
174};
175
8a4e3a85 176static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
59e6d833 177 odp_port_t);
ff073a71 178
51852a57
BP
179enum dp_stat_type {
180 DP_STAT_HIT, /* Packets that matched in the flow table. */
181 DP_STAT_MISS, /* Packets that did not match. */
182 DP_STAT_LOST, /* Packets not passed up to the client. */
183 DP_N_STATS
184};
185
186/* Contained by struct dp_netdev's 'stats' member. */
187struct dp_netdev_stats {
188 struct ovs_mutex mutex; /* Protects 'n'. */
189
190 /* Indexed by DP_STAT_*, protected by 'mutex'. */
191 unsigned long long int n[DP_N_STATS] OVS_GUARDED;
192};
193
194
72865317
BP
195/* A port in a netdev-based datapath. */
196struct dp_netdev_port {
59e6d833 197 struct cmap_node node; /* Node in dp_netdev's 'ports'. */
ff073a71 198 odp_port_t port_no;
72865317 199 struct netdev *netdev;
4b609110 200 struct netdev_saved_flags *sf;
55c955bd 201 struct netdev_rxq **rxq;
b284085e 202 struct ovs_refcount ref_cnt;
0cbfe35d 203 char *type; /* Port type as requested by user. */
72865317
BP
204};
205
8cbf4f47
DDP
206
207/* Stores a miniflow */
208
209/* There are fields in the flow structure that we never use. Therefore we can
210 * save a few words of memory */
211#define NETDEV_KEY_BUF_SIZE_U32 (FLOW_U32S \
212 - FLOW_U32_SIZE(regs) \
213 - FLOW_U32_SIZE(metadata) \
214 )
215struct netdev_flow_key {
216 struct miniflow flow;
217 uint32_t buf[NETDEV_KEY_BUF_SIZE_U32];
218} key;
219
8a4e3a85
BP
220/* A flow in dp_netdev's 'flow_table'.
221 *
222 *
223 * Thread-safety
224 * =============
225 *
226 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
227 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
228 *
229 * Motivation
230 * ----------
231 *
232 * The thread safety rules described here for "struct dp_netdev_flow" are
233 * motivated by two goals:
234 *
235 * - Prevent threads that read members of "struct dp_netdev_flow" from
236 * reading bad data due to changes by some thread concurrently modifying
237 * those members.
238 *
239 * - Prevent two threads making changes to members of a given "struct
240 * dp_netdev_flow" from interfering with each other.
241 *
242 *
243 * Rules
244 * -----
245 *
246 * A flow 'flow' may be accessed without a risk of being freed by code that
247 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
248 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
249 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
250 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
251 *
252 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
253 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
45c626a3 254 * protect members of 'flow' from modification.
8a4e3a85
BP
255 *
256 * Some members, marked 'const', are immutable. Accessing other members
257 * requires synchronization, as noted in more detail below.
258 */
72865317 259struct dp_netdev_flow {
2c0ea78f 260 /* Packet classification. */
8a4e3a85 261 const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */
2c0ea78f 262
8a4e3a85
BP
263 /* Hash table index by unmasked flow. */
264 const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
265 const struct flow flow; /* The flow that created this entry. */
72865317 266
8a4e3a85
BP
267 /* Statistics.
268 *
269 * Reading or writing these members requires 'mutex'. */
679ba04c 270 struct ovsthread_stats stats; /* Contains "struct dp_netdev_flow_stats". */
8a4e3a85 271
45c626a3 272 /* Actions. */
61e7deb1 273 OVSRCU_TYPE(struct dp_netdev_actions *) actions;
72865317
BP
274};
275
61e7deb1 276static void dp_netdev_flow_free(struct dp_netdev_flow *);
8a4e3a85 277
679ba04c
BP
278/* Contained by struct dp_netdev_flow's 'stats' member. */
279struct dp_netdev_flow_stats {
280 struct ovs_mutex mutex; /* Guards all the other members. */
281
282 long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
283 long long int packet_count OVS_GUARDED; /* Number of packets matched. */
284 long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
285 uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
286};
287
a84cb64a
BP
288/* A set of datapath actions within a "struct dp_netdev_flow".
289 *
290 *
291 * Thread-safety
292 * =============
293 *
45c626a3 294 * A struct dp_netdev_actions 'actions' is protected with RCU. */
a84cb64a 295struct dp_netdev_actions {
a84cb64a
BP
296 /* These members are immutable: they do not change during the struct's
297 * lifetime. */
298 struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */
299 unsigned int size; /* Size of 'actions', in bytes. */
300};
301
302struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
303 size_t);
61e7deb1
BP
304struct dp_netdev_actions *dp_netdev_flow_get_actions(
305 const struct dp_netdev_flow *);
306static void dp_netdev_actions_free(struct dp_netdev_actions *);
a84cb64a 307
e4cfed38
PS
308/* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate
309 * the performance overhead of interrupt processing. Therefore netdev can
310 * not implement rx-wait for these devices. dpif-netdev needs to poll
311 * these device to check for recv buffer. pmd-thread does polling for
312 * devices assigned to itself thread.
313 *
314 * DPDK used PMD for accessing NIC.
315 *
316 * A thread that receives packets from PMD ports, looks them up in the flow
317 * table, and executes the actions it finds.
318 **/
319struct pmd_thread {
6c3eee82
BP
320 struct dp_netdev *dp;
321 pthread_t thread;
e4cfed38
PS
322 int id;
323 atomic_uint change_seq;
6c3eee82
BP
324};
325
72865317
BP
326/* Interface to netdev-based datapath. */
327struct dpif_netdev {
328 struct dpif dpif;
329 struct dp_netdev *dp;
d33ed218 330 uint64_t last_port_seq;
72865317
BP
331};
332
8a4e3a85 333static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
59e6d833 334 struct dp_netdev_port **portp);
8a4e3a85 335static int get_port_by_name(struct dp_netdev *dp, const char *devname,
59e6d833 336 struct dp_netdev_port **portp);
8a4e3a85
BP
337static void dp_netdev_free(struct dp_netdev *)
338 OVS_REQUIRES(dp_netdev_mutex);
72865317 339static void dp_netdev_flow_flush(struct dp_netdev *);
8a4e3a85
BP
340static int do_add_port(struct dp_netdev *dp, const char *devname,
341 const char *type, odp_port_t port_no)
59e6d833 342 OVS_REQUIRES(dp->port_mutex);
c40b890f 343static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
59e6d833 344 OVS_REQUIRES(dp->port_mutex);
63be20be
AW
345static void dp_netdev_destroy_all_queues(struct dp_netdev *dp)
346 OVS_REQ_WRLOCK(dp->queue_rwlock);
614c4892
BP
347static int dpif_netdev_open(const struct dpif_class *, const char *name,
348 bool create, struct dpif **);
8cbf4f47
DDP
349static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **,
350 int cnt, int queue_no, int type,
4f150744 351 const struct miniflow *,
e4cfed38 352 const struct nlattr *userdata);
8a4e3a85 353static void dp_netdev_execute_actions(struct dp_netdev *dp,
8cbf4f47
DDP
354 struct dpif_packet **, int c,
355 bool may_steal, struct pkt_metadata *,
4edb9ae9 356 const struct nlattr *actions,
e4cfed38 357 size_t actions_len);
91088554 358static void dp_netdev_port_input(struct dp_netdev *dp,
8cbf4f47
DDP
359 struct dpif_packet **packets, int cnt,
360 odp_port_t port_no);
e4cfed38
PS
361
362static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);
72865317
BP
363
364static struct dpif_netdev *
365dpif_netdev_cast(const struct dpif *dpif)
366{
cb22974d 367 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
368 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
369}
370
371static struct dp_netdev *
372get_dp_netdev(const struct dpif *dpif)
373{
374 return dpif_netdev_cast(dpif)->dp;
375}
376
2197d7ab 377static int
2240af25
DDP
378dpif_netdev_enumerate(struct sset *all_dps,
379 const struct dpif_class *dpif_class)
2197d7ab
GL
380{
381 struct shash_node *node;
382
97be1538 383 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab 384 SHASH_FOR_EACH(node, &dp_netdevs) {
2240af25
DDP
385 struct dp_netdev *dp = node->data;
386 if (dpif_class != dp->class) {
387 /* 'dp_netdevs' contains both "netdev" and "dummy" dpifs.
388 * If the class doesn't match, skip this dpif. */
389 continue;
390 }
2197d7ab
GL
391 sset_add(all_dps, node->name);
392 }
97be1538 393 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 394
2197d7ab
GL
395 return 0;
396}
397
add90f6f
EJ
398static bool
399dpif_netdev_class_is_dummy(const struct dpif_class *class)
400{
401 return class != &dpif_netdev_class;
402}
403
0aeaabc8
JP
404static const char *
405dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
406{
407 return strcmp(type, "internal") ? type
add90f6f 408 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
409 : "tap";
410}
411
72865317
BP
412static struct dpif *
413create_dpif_netdev(struct dp_netdev *dp)
414{
462278db 415 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 416 struct dpif_netdev *dpif;
72865317 417
6a8267c5 418 ovs_refcount_ref(&dp->ref_cnt);
72865317 419
72865317 420 dpif = xmalloc(sizeof *dpif);
614c4892 421 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 422 dpif->dp = dp;
d33ed218 423 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
424
425 return &dpif->dpif;
426}
427
4e022ec0
AW
428/* Choose an unused, non-zero port number and return it on success.
429 * Return ODPP_NONE on failure. */
430static odp_port_t
e44768b7 431choose_port(struct dp_netdev *dp, const char *name)
59e6d833 432 OVS_REQUIRES(dp->port_mutex)
e44768b7 433{
4e022ec0 434 uint32_t port_no;
e44768b7
JP
435
436 if (dp->class != &dpif_netdev_class) {
437 const char *p;
438 int start_no = 0;
439
440 /* If the port name begins with "br", start the number search at
441 * 100 to make writing tests easier. */
442 if (!strncmp(name, "br", 2)) {
443 start_no = 100;
444 }
445
446 /* If the port name contains a number, try to assign that port number.
447 * This can make writing unit tests easier because port numbers are
448 * predictable. */
449 for (p = name; *p != '\0'; p++) {
450 if (isdigit((unsigned char) *p)) {
451 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
452 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
453 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 454 return u32_to_odp(port_no);
e44768b7
JP
455 }
456 break;
457 }
458 }
459 }
460
ff073a71
BP
461 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
462 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 463 return u32_to_odp(port_no);
e44768b7
JP
464 }
465 }
466
4e022ec0 467 return ODPP_NONE;
e44768b7
JP
468}
469
72865317 470static int
614c4892
BP
471create_dp_netdev(const char *name, const struct dpif_class *class,
472 struct dp_netdev **dpp)
8a4e3a85 473 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
474{
475 struct dp_netdev *dp;
476 int error;
72865317 477
462278db 478 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
479 shash_add(&dp_netdevs, name, dp);
480
481 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
482 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5 483 ovs_refcount_init(&dp->ref_cnt);
1a65ba85 484 atomic_flag_clear(&dp->destroyed);
8a4e3a85
BP
485
486 ovs_mutex_init(&dp->flow_mutex);
487 classifier_init(&dp->cls, NULL);
488 hmap_init(&dp->flow_table);
489
63be20be 490 fat_rwlock_init(&dp->queue_rwlock);
ed27e010 491
51852a57 492 ovsthread_stats_init(&dp->stats);
ed27e010 493
59e6d833
BP
494 ovs_mutex_init(&dp->port_mutex);
495 cmap_init(&dp->ports);
d33ed218 496 dp->port_seq = seq_create();
6c3eee82 497 latch_init(&dp->exit_latch);
e44768b7 498
59e6d833 499 ovs_mutex_lock(&dp->port_mutex);
4e022ec0 500 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
59e6d833 501 ovs_mutex_unlock(&dp->port_mutex);
72865317
BP
502 if (error) {
503 dp_netdev_free(dp);
462278db 504 return error;
72865317
BP
505 }
506
462278db 507 *dpp = dp;
72865317
BP
508 return 0;
509}
510
511static int
614c4892 512dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 513 bool create, struct dpif **dpifp)
72865317 514{
462278db 515 struct dp_netdev *dp;
5279f8fd 516 int error;
462278db 517
97be1538 518 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
519 dp = shash_find_data(&dp_netdevs, name);
520 if (!dp) {
5279f8fd 521 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 522 } else {
5279f8fd
BP
523 error = (dp->class != class ? EINVAL
524 : create ? EEXIST
525 : 0);
526 }
527 if (!error) {
528 *dpifp = create_dpif_netdev(dp);
72865317 529 }
97be1538 530 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 531
5279f8fd 532 return error;
72865317
BP
533}
534
535static void
1ba530f4 536dp_netdev_purge_queues(struct dp_netdev *dp)
63be20be 537 OVS_REQ_WRLOCK(dp->queue_rwlock)
72865317
BP
538{
539 int i;
540
63be20be
AW
541 for (i = 0; i < dp->n_handlers; i++) {
542 struct dp_netdev_queue *q = &dp->handler_queues[i];
856081f6 543
63be20be 544 ovs_mutex_lock(&q->mutex);
1ba530f4 545 while (q->tail != q->head) {
d88b629b 546 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
da546e07 547 ofpbuf_uninit(&u->upcall.packet);
d88b629b 548 ofpbuf_uninit(&u->buf);
856081f6 549 }
63be20be 550 ovs_mutex_unlock(&q->mutex);
72865317 551 }
1ba530f4
BP
552}
553
8a4e3a85
BP
554/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
555 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
556static void
557dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 558 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 559{
59e6d833 560 struct dp_netdev_port *port;
51852a57
BP
561 struct dp_netdev_stats *bucket;
562 int i;
4ad28026 563
8a4e3a85
BP
564 shash_find_and_delete(&dp_netdevs, dp->name);
565
e4cfed38
PS
566 dp_netdev_set_pmd_threads(dp, 0);
567 free(dp->pmd_threads);
6c3eee82 568
1ba530f4 569 dp_netdev_flow_flush(dp);
59e6d833 570 ovs_mutex_lock(&dp->port_mutex);
a532e683 571 CMAP_FOR_EACH (port, node, &dp->ports) {
c40b890f 572 do_del_port(dp, port);
1ba530f4 573 }
59e6d833 574 ovs_mutex_unlock(&dp->port_mutex);
51852a57
BP
575
576 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
577 ovs_mutex_destroy(&bucket->mutex);
578 free_cacheline(bucket);
579 }
580 ovsthread_stats_destroy(&dp->stats);
f5126b57 581
63be20be
AW
582 fat_rwlock_wrlock(&dp->queue_rwlock);
583 dp_netdev_destroy_all_queues(dp);
584 fat_rwlock_unlock(&dp->queue_rwlock);
585
586 fat_rwlock_destroy(&dp->queue_rwlock);
f5126b57 587
2c0ea78f 588 classifier_destroy(&dp->cls);
72865317 589 hmap_destroy(&dp->flow_table);
8a4e3a85 590 ovs_mutex_destroy(&dp->flow_mutex);
d33ed218 591 seq_destroy(dp->port_seq);
59e6d833 592 cmap_destroy(&dp->ports);
6c3eee82 593 latch_destroy(&dp->exit_latch);
8a4e3a85 594 free(CONST_CAST(char *, dp->name));
72865317
BP
595 free(dp);
596}
597
8a4e3a85
BP
598static void
599dp_netdev_unref(struct dp_netdev *dp)
600{
601 if (dp) {
602 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
603 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
604 ovs_mutex_lock(&dp_netdev_mutex);
605 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
606 dp_netdev_free(dp);
607 }
608 ovs_mutex_unlock(&dp_netdev_mutex);
609 }
610}
611
72865317
BP
612static void
613dpif_netdev_close(struct dpif *dpif)
614{
615 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 616
8a4e3a85 617 dp_netdev_unref(dp);
72865317
BP
618 free(dpif);
619}
620
621static int
7dab847a 622dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
623{
624 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 625
6a8267c5
BP
626 if (!atomic_flag_test_and_set(&dp->destroyed)) {
627 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
628 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
629 OVS_NOT_REACHED();
630 }
631 }
5279f8fd 632
72865317
BP
633 return 0;
634}
635
636static int
a8d9304d 637dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
638{
639 struct dp_netdev *dp = get_dp_netdev(dpif);
51852a57
BP
640 struct dp_netdev_stats *bucket;
641 size_t i;
5279f8fd 642
06f81620 643 fat_rwlock_rdlock(&dp->cls.rwlock);
f180c2e2 644 stats->n_flows = hmap_count(&dp->flow_table);
06f81620 645 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 646
51852a57
BP
647 stats->n_hit = stats->n_missed = stats->n_lost = 0;
648 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &dp->stats) {
649 ovs_mutex_lock(&bucket->mutex);
650 stats->n_hit += bucket->n[DP_STAT_HIT];
651 stats->n_missed += bucket->n[DP_STAT_MISS];
652 stats->n_lost += bucket->n[DP_STAT_LOST];
653 ovs_mutex_unlock(&bucket->mutex);
654 }
1ce3fa06 655 stats->n_masks = UINT32_MAX;
847108dc 656 stats->n_mask_hit = UINT64_MAX;
5279f8fd 657
72865317
BP
658 return 0;
659}
660
e4cfed38
PS
661static void
662dp_netdev_reload_pmd_threads(struct dp_netdev *dp)
663{
664 int i;
665
666 for (i = 0; i < dp->n_pmd_threads; i++) {
667 struct pmd_thread *f = &dp->pmd_threads[i];
668 int id;
669
670 atomic_add(&f->change_seq, 1, &id);
671 }
672}
673
59e6d833
BP
674static uint32_t
675hash_port_no(odp_port_t port_no)
676{
677 return hash_int(odp_to_u32(port_no), 0);
678}
679
72865317 680static int
c3827f61 681do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 682 odp_port_t port_no)
59e6d833 683 OVS_REQUIRES(dp->port_mutex)
72865317 684{
4b609110 685 struct netdev_saved_flags *sf;
72865317
BP
686 struct dp_netdev_port *port;
687 struct netdev *netdev;
2499a8ce 688 enum netdev_flags flags;
0cbfe35d 689 const char *open_type;
72865317 690 int error;
55c955bd 691 int i;
72865317
BP
692
693 /* XXX reject devices already in some dp_netdev. */
694
695 /* Open and validate network device. */
0aeaabc8 696 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 697 error = netdev_open(devname, open_type, &netdev);
72865317
BP
698 if (error) {
699 return error;
700 }
72865317
BP
701 /* XXX reject non-Ethernet devices */
702
2499a8ce
AC
703 netdev_get_flags(netdev, &flags);
704 if (flags & NETDEV_LOOPBACK) {
705 VLOG_ERR("%s: cannot add a loopback device", devname);
706 netdev_close(netdev);
707 return EINVAL;
708 }
709
e4cfed38
PS
710 port = xzalloc(sizeof *port);
711 port->port_no = port_no;
712 port->netdev = netdev;
55c955bd 713 port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
e4cfed38 714 port->type = xstrdup(type);
55c955bd
PS
715 for (i = 0; i < netdev_n_rxq(netdev); i++) {
716 error = netdev_rxq_open(netdev, &port->rxq[i], i);
717 if (error
718 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
719 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
720 devname, ovs_strerror(errno));
721 netdev_close(netdev);
722 return error;
723 }
7b6b0ef4
BP
724 }
725
4b609110 726 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 727 if (error) {
55c955bd
PS
728 for (i = 0; i < netdev_n_rxq(netdev); i++) {
729 netdev_rxq_close(port->rxq[i]);
730 }
72865317 731 netdev_close(netdev);
f7791740 732 free(port->rxq);
e4cfed38 733 free(port);
72865317
BP
734 return error;
735 }
4b609110 736 port->sf = sf;
e4cfed38
PS
737
738 if (netdev_is_pmd(netdev)) {
739 dp->pmd_count++;
740 dp_netdev_set_pmd_threads(dp, NR_THREADS);
741 dp_netdev_reload_pmd_threads(dp);
742 }
743 ovs_refcount_init(&port->ref_cnt);
72865317 744
59e6d833 745 cmap_insert(&dp->ports, &port->node, hash_port_no(port_no));
d33ed218 746 seq_change(dp->port_seq);
72865317
BP
747
748 return 0;
749}
750
247527db
BP
751static int
752dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 753 odp_port_t *port_nop)
247527db
BP
754{
755 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
756 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
757 const char *dpif_port;
4e022ec0 758 odp_port_t port_no;
5279f8fd 759 int error;
247527db 760
59e6d833 761 ovs_mutex_lock(&dp->port_mutex);
3aa30359 762 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 763 if (*port_nop != ODPP_NONE) {
ff073a71
BP
764 port_no = *port_nop;
765 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 766 } else {
3aa30359 767 port_no = choose_port(dp, dpif_port);
5279f8fd 768 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 769 }
5279f8fd 770 if (!error) {
247527db 771 *port_nop = port_no;
5279f8fd 772 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 773 }
59e6d833 774 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
775
776 return error;
72865317
BP
777}
778
779static int
4e022ec0 780dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
781{
782 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
783 int error;
784
59e6d833 785 ovs_mutex_lock(&dp->port_mutex);
c40b890f
BP
786 if (port_no == ODPP_LOCAL) {
787 error = EINVAL;
788 } else {
789 struct dp_netdev_port *port;
790
791 error = get_port_by_number(dp, port_no, &port);
792 if (!error) {
793 do_del_port(dp, port);
794 }
795 }
59e6d833 796 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd
BP
797
798 return error;
72865317
BP
799}
800
801static bool
4e022ec0 802is_valid_port_number(odp_port_t port_no)
72865317 803{
ff073a71
BP
804 return port_no != ODPP_NONE;
805}
806
807static struct dp_netdev_port *
808dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
809{
810 struct dp_netdev_port *port;
811
59e6d833 812 CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
ff073a71
BP
813 if (port->port_no == port_no) {
814 return port;
815 }
816 }
817 return NULL;
72865317
BP
818}
819
820static int
821get_port_by_number(struct dp_netdev *dp,
4e022ec0 822 odp_port_t port_no, struct dp_netdev_port **portp)
72865317
BP
823{
824 if (!is_valid_port_number(port_no)) {
825 *portp = NULL;
826 return EINVAL;
827 } else {
ff073a71 828 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
829 return *portp ? 0 : ENOENT;
830 }
831}
832
b284085e
PS
833static void
834port_ref(struct dp_netdev_port *port)
835{
836 if (port) {
837 ovs_refcount_ref(&port->ref_cnt);
838 }
839}
840
841static void
59e6d833 842port_destroy__(struct dp_netdev_port *port)
b284085e 843{
98de6beb 844 int n_rxq = netdev_n_rxq(port->netdev);
59e6d833 845 int i;
55c955bd 846
59e6d833
BP
847 netdev_close(port->netdev);
848 netdev_restore_flags(port->sf);
55c955bd 849
59e6d833
BP
850 for (i = 0; i < n_rxq; i++) {
851 netdev_rxq_close(port->rxq[i]);
852 }
853 free(port->rxq);
854 free(port->type);
855 free(port);
856}
857
858static void
859port_unref(struct dp_netdev_port *port)
860{
861 if (port && ovs_refcount_unref(&port->ref_cnt) == 1) {
862 ovsrcu_postpone(port_destroy__, port);
b284085e
PS
863 }
864}
865
72865317
BP
866static int
867get_port_by_name(struct dp_netdev *dp,
868 const char *devname, struct dp_netdev_port **portp)
59e6d833 869 OVS_REQUIRES(dp->port_mutex)
72865317
BP
870{
871 struct dp_netdev_port *port;
872
a532e683 873 CMAP_FOR_EACH (port, node, &dp->ports) {
3efb6063 874 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
875 *portp = port;
876 return 0;
877 }
878 }
879 return ENOENT;
880}
881
c40b890f
BP
882static void
883do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
59e6d833 884 OVS_REQUIRES(dp->port_mutex)
72865317 885{
c40b890f 886 cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
d33ed218 887 seq_change(dp->port_seq);
e4cfed38
PS
888 if (netdev_is_pmd(port->netdev)) {
889 dp_netdev_reload_pmd_threads(dp);
890 }
72865317 891
b284085e 892 port_unref(port);
72865317
BP
893}
894
895static void
4c738a8d
BP
896answer_port_query(const struct dp_netdev_port *port,
897 struct dpif_port *dpif_port)
72865317 898{
3efb6063 899 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 900 dpif_port->type = xstrdup(port->type);
4c738a8d 901 dpif_port->port_no = port->port_no;
72865317
BP
902}
903
904static int
4e022ec0 905dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 906 struct dpif_port *dpif_port)
72865317
BP
907{
908 struct dp_netdev *dp = get_dp_netdev(dpif);
909 struct dp_netdev_port *port;
910 int error;
911
912 error = get_port_by_number(dp, port_no, &port);
4afba28d 913 if (!error && dpif_port) {
4c738a8d 914 answer_port_query(port, dpif_port);
72865317 915 }
5279f8fd 916
72865317
BP
917 return error;
918}
919
920static int
921dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 922 struct dpif_port *dpif_port)
72865317
BP
923{
924 struct dp_netdev *dp = get_dp_netdev(dpif);
925 struct dp_netdev_port *port;
926 int error;
927
59e6d833 928 ovs_mutex_lock(&dp->port_mutex);
72865317 929 error = get_port_by_name(dp, devname, &port);
4afba28d 930 if (!error && dpif_port) {
4c738a8d 931 answer_port_query(port, dpif_port);
72865317 932 }
59e6d833 933 ovs_mutex_unlock(&dp->port_mutex);
5279f8fd 934
72865317
BP
935 return error;
936}
937
61e7deb1
BP
938static void
939dp_netdev_flow_free(struct dp_netdev_flow *flow)
940{
941 struct dp_netdev_flow_stats *bucket;
942 size_t i;
943
944 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &flow->stats) {
945 ovs_mutex_destroy(&bucket->mutex);
946 free_cacheline(bucket);
947 }
948 ovsthread_stats_destroy(&flow->stats);
949
950 cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
951 dp_netdev_actions_free(dp_netdev_flow_get_actions(flow));
61e7deb1
BP
952 free(flow);
953}
954
72865317 955static void
8a4e3a85
BP
956dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
957 OVS_REQ_WRLOCK(dp->cls.rwlock)
958 OVS_REQUIRES(dp->flow_mutex)
72865317 959{
8a4e3a85
BP
960 struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
961 struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
2c0ea78f 962
8a4e3a85
BP
963 classifier_remove(&dp->cls, cr);
964 hmap_remove(&dp->flow_table, node);
61e7deb1 965 ovsrcu_postpone(dp_netdev_flow_free, flow);
72865317
BP
966}
967
968static void
969dp_netdev_flow_flush(struct dp_netdev *dp)
970{
1763b4b8 971 struct dp_netdev_flow *netdev_flow, *next;
72865317 972
8a4e3a85 973 ovs_mutex_lock(&dp->flow_mutex);
06f81620 974 fat_rwlock_wrlock(&dp->cls.rwlock);
1763b4b8 975 HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
8a4e3a85 976 dp_netdev_remove_flow(dp, netdev_flow);
72865317 977 }
06f81620 978 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 979 ovs_mutex_unlock(&dp->flow_mutex);
72865317
BP
980}
981
982static int
983dpif_netdev_flow_flush(struct dpif *dpif)
984{
985 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 986
72865317
BP
987 dp_netdev_flow_flush(dp);
988 return 0;
989}
990
b0ec0f27 991struct dp_netdev_port_state {
59e6d833 992 struct cmap_position position;
4c738a8d 993 char *name;
b0ec0f27
BP
994};
995
996static int
997dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
998{
999 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
1000 return 0;
1001}
1002
72865317 1003static int
b0ec0f27 1004dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 1005 struct dpif_port *dpif_port)
72865317 1006{
b0ec0f27 1007 struct dp_netdev_port_state *state = state_;
72865317 1008 struct dp_netdev *dp = get_dp_netdev(dpif);
59e6d833 1009 struct cmap_node *node;
ff073a71 1010 int retval;
72865317 1011
59e6d833 1012 node = cmap_next_position(&dp->ports, &state->position);
ff073a71
BP
1013 if (node) {
1014 struct dp_netdev_port *port;
5279f8fd 1015
ff073a71
BP
1016 port = CONTAINER_OF(node, struct dp_netdev_port, node);
1017
1018 free(state->name);
1019 state->name = xstrdup(netdev_get_name(port->netdev));
1020 dpif_port->name = state->name;
1021 dpif_port->type = port->type;
1022 dpif_port->port_no = port->port_no;
1023
1024 retval = 0;
1025 } else {
1026 retval = EOF;
72865317 1027 }
5279f8fd 1028
ff073a71 1029 return retval;
b0ec0f27
BP
1030}
1031
1032static int
4c738a8d 1033dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 1034{
4c738a8d
BP
1035 struct dp_netdev_port_state *state = state_;
1036 free(state->name);
b0ec0f27
BP
1037 free(state);
1038 return 0;
72865317
BP
1039}
1040
1041static int
67a4917b 1042dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
1043{
1044 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 1045 uint64_t new_port_seq;
5279f8fd
BP
1046 int error;
1047
d33ed218
BP
1048 new_port_seq = seq_read(dpif->dp->port_seq);
1049 if (dpif->last_port_seq != new_port_seq) {
1050 dpif->last_port_seq = new_port_seq;
5279f8fd 1051 error = ENOBUFS;
72865317 1052 } else {
5279f8fd 1053 error = EAGAIN;
72865317 1054 }
5279f8fd
BP
1055
1056 return error;
72865317
BP
1057}
1058
1059static void
1060dpif_netdev_port_poll_wait(const struct dpif *dpif_)
1061{
1062 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 1063
d33ed218 1064 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
1065}
1066
1067static struct dp_netdev_flow *
1068dp_netdev_flow_cast(const struct cls_rule *cr)
1069{
1070 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
1071}
1072
72865317 1073static struct dp_netdev_flow *
4f150744 1074dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct miniflow *key)
8a4e3a85 1075 OVS_EXCLUDED(dp->cls.rwlock)
2c0ea78f 1076{
8a4e3a85 1077 struct dp_netdev_flow *netdev_flow;
4f150744 1078 struct cls_rule *rule;
2c0ea78f 1079
06f81620 1080 fat_rwlock_rdlock(&dp->cls.rwlock);
4f150744
JR
1081 rule = classifier_lookup_miniflow_first(&dp->cls, key);
1082 netdev_flow = dp_netdev_flow_cast(rule);
06f81620 1083 fat_rwlock_unlock(&dp->cls.rwlock);
2c0ea78f 1084
8a4e3a85 1085 return netdev_flow;
2c0ea78f
GS
1086}
1087
1088static struct dp_netdev_flow *
1089dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 1090 OVS_REQ_RDLOCK(dp->cls.rwlock)
72865317 1091{
1763b4b8 1092 struct dp_netdev_flow *netdev_flow;
72865317 1093
2c0ea78f 1094 HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1763b4b8 1095 &dp->flow_table) {
2c0ea78f 1096 if (flow_equal(&netdev_flow->flow, flow)) {
61e7deb1 1097 return netdev_flow;
72865317
BP
1098 }
1099 }
8a4e3a85 1100
72865317
BP
1101 return NULL;
1102}
1103
1104static void
1763b4b8
GS
1105get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
1106 struct dpif_flow_stats *stats)
feebdea2 1107{
679ba04c
BP
1108 struct dp_netdev_flow_stats *bucket;
1109 size_t i;
1110
1111 memset(stats, 0, sizeof *stats);
1112 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1113 ovs_mutex_lock(&bucket->mutex);
1114 stats->n_packets += bucket->packet_count;
1115 stats->n_bytes += bucket->byte_count;
1116 stats->used = MAX(stats->used, bucket->used);
1117 stats->tcp_flags |= bucket->tcp_flags;
1118 ovs_mutex_unlock(&bucket->mutex);
1119 }
72865317
BP
1120}
1121
36956a7d 1122static int
8c301900
JR
1123dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1124 const struct nlattr *mask_key,
1125 uint32_t mask_key_len, const struct flow *flow,
1126 struct flow *mask)
1127{
1128 if (mask_key_len) {
80e44883
BP
1129 enum odp_key_fitness fitness;
1130
1131 fitness = odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow);
1132 if (fitness) {
8c301900
JR
1133 /* This should not happen: it indicates that
1134 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1135 * disagree on the acceptable form of a mask. Log the problem
1136 * as an error, with enough details to enable debugging. */
1137 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1138
1139 if (!VLOG_DROP_ERR(&rl)) {
1140 struct ds s;
1141
1142 ds_init(&s);
1143 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1144 true);
80e44883
BP
1145 VLOG_ERR("internal error parsing flow mask %s (%s)",
1146 ds_cstr(&s), odp_key_fitness_to_string(fitness));
8c301900
JR
1147 ds_destroy(&s);
1148 }
1149
1150 return EINVAL;
1151 }
8c301900
JR
1152 } else {
1153 enum mf_field_id id;
1154 /* No mask key, unwildcard everything except fields whose
1155 * prerequisities are not met. */
1156 memset(mask, 0x0, sizeof *mask);
1157
1158 for (id = 0; id < MFF_N_IDS; ++id) {
1159 /* Skip registers and metadata. */
1160 if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
1161 && id != MFF_METADATA) {
1162 const struct mf_field *mf = mf_from_id(id);
1163 if (mf_are_prereqs_ok(mf, flow)) {
1164 mf_mask_field(mf, mask);
1165 }
1166 }
1167 }
1168 }
1169
f3f750e5
BP
1170 /* Force unwildcard the in_port.
1171 *
1172 * We need to do this even in the case where we unwildcard "everything"
1173 * above because "everything" only includes the 16-bit OpenFlow port number
1174 * mask->in_port.ofp_port, which only covers half of the 32-bit datapath
1175 * port number mask->in_port.odp_port. */
1176 mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
1177
8c301900
JR
1178 return 0;
1179}
1180
1181static int
1182dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1183 struct flow *flow)
36956a7d 1184{
586ddea5
BP
1185 odp_port_t in_port;
1186
8c301900 1187 if (odp_flow_key_to_flow(key, key_len, flow)) {
36956a7d 1188 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1189 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1190 * flow. Log the problem as an error, with enough details to enable
1191 * debugging. */
36956a7d
BP
1192 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1193
1194 if (!VLOG_DROP_ERR(&rl)) {
1195 struct ds s;
1196
1197 ds_init(&s);
8c301900 1198 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1199 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1200 ds_destroy(&s);
1201 }
1202
1203 return EINVAL;
1204 }
1205
586ddea5
BP
1206 in_port = flow->in_port.odp_port;
1207 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1208 return EINVAL;
1209 }
1210
36956a7d
BP
1211 return 0;
1212}
1213
72865317 1214static int
693c4a01 1215dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 1216 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 1217 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
1218{
1219 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1220 struct dp_netdev_flow *netdev_flow;
bc4a05c6
BP
1221 struct flow key;
1222 int error;
36956a7d 1223
feebdea2 1224 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
1225 if (error) {
1226 return error;
1227 }
14608a15 1228
06f81620 1229 fat_rwlock_rdlock(&dp->cls.rwlock);
2c0ea78f 1230 netdev_flow = dp_netdev_find_flow(dp, &key);
06f81620 1231 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1232
1763b4b8 1233 if (netdev_flow) {
5279f8fd 1234 if (stats) {
1763b4b8 1235 get_dpif_flow_stats(netdev_flow, stats);
5279f8fd 1236 }
679ba04c 1237
5279f8fd 1238 if (actionsp) {
61e7deb1 1239 struct dp_netdev_actions *actions;
8a4e3a85 1240
61e7deb1 1241 actions = dp_netdev_flow_get_actions(netdev_flow);
8a4e3a85 1242 *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
5279f8fd 1243 }
61e7deb1 1244 } else {
5279f8fd 1245 error = ENOENT;
72865317 1246 }
bc4a05c6 1247
5279f8fd 1248 return error;
72865317
BP
1249}
1250
72865317 1251static int
2c0ea78f
GS
1252dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1253 const struct flow_wildcards *wc,
1254 const struct nlattr *actions,
1255 size_t actions_len)
8a4e3a85 1256 OVS_REQUIRES(dp->flow_mutex)
72865317 1257{
1763b4b8 1258 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1259 struct match match;
72865317 1260
1763b4b8 1261 netdev_flow = xzalloc(sizeof *netdev_flow);
8a4e3a85 1262 *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
8a4e3a85 1263
679ba04c
BP
1264 ovsthread_stats_init(&netdev_flow->stats);
1265
61e7deb1
BP
1266 ovsrcu_set(&netdev_flow->actions,
1267 dp_netdev_actions_create(actions, actions_len));
2c0ea78f
GS
1268
1269 match_init(&match, flow, wc);
8a4e3a85
BP
1270 cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1271 &match, NETDEV_RULE_PRIORITY);
06f81620 1272 fat_rwlock_wrlock(&dp->cls.rwlock);
8a4e3a85
BP
1273 classifier_insert(&dp->cls,
1274 CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1275 hmap_insert(&dp->flow_table,
1276 CONST_CAST(struct hmap_node *, &netdev_flow->node),
1277 flow_hash(flow, 0));
06f81620 1278 fat_rwlock_unlock(&dp->cls.rwlock);
72865317 1279
72865317
BP
1280 return 0;
1281}
1282
1283static void
1763b4b8 1284clear_stats(struct dp_netdev_flow *netdev_flow)
72865317 1285{
679ba04c
BP
1286 struct dp_netdev_flow_stats *bucket;
1287 size_t i;
1288
1289 OVSTHREAD_STATS_FOR_EACH_BUCKET (bucket, i, &netdev_flow->stats) {
1290 ovs_mutex_lock(&bucket->mutex);
1291 bucket->used = 0;
1292 bucket->packet_count = 0;
1293 bucket->byte_count = 0;
1294 bucket->tcp_flags = 0;
1295 ovs_mutex_unlock(&bucket->mutex);
1296 }
72865317
BP
1297}
1298
1299static int
89625d1e 1300dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
1301{
1302 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1303 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1304 struct flow flow;
4f150744 1305 struct miniflow miniflow;
2c0ea78f 1306 struct flow_wildcards wc;
36956a7d
BP
1307 int error;
1308
8c301900
JR
1309 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
1310 if (error) {
1311 return error;
1312 }
1313 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1314 put->mask, put->mask_len,
1315 &flow, &wc.masks);
36956a7d
BP
1316 if (error) {
1317 return error;
1318 }
4f150744 1319 miniflow_init(&miniflow, &flow);
72865317 1320
8a4e3a85 1321 ovs_mutex_lock(&dp->flow_mutex);
4f150744 1322 netdev_flow = dp_netdev_lookup_flow(dp, &miniflow);
1763b4b8 1323 if (!netdev_flow) {
89625d1e 1324 if (put->flags & DPIF_FP_CREATE) {
72865317 1325 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
1326 if (put->stats) {
1327 memset(put->stats, 0, sizeof *put->stats);
feebdea2 1328 }
2c0ea78f 1329 error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
5279f8fd 1330 put->actions_len);
72865317 1331 } else {
5279f8fd 1332 error = EFBIG;
72865317
BP
1333 }
1334 } else {
5279f8fd 1335 error = ENOENT;
72865317
BP
1336 }
1337 } else {
2c0ea78f
GS
1338 if (put->flags & DPIF_FP_MODIFY
1339 && flow_equal(&flow, &netdev_flow->flow)) {
8a4e3a85
BP
1340 struct dp_netdev_actions *new_actions;
1341 struct dp_netdev_actions *old_actions;
1342
1343 new_actions = dp_netdev_actions_create(put->actions,
1344 put->actions_len);
1345
61e7deb1
BP
1346 old_actions = dp_netdev_flow_get_actions(netdev_flow);
1347 ovsrcu_set(&netdev_flow->actions, new_actions);
679ba04c 1348
a84cb64a
BP
1349 if (put->stats) {
1350 get_dpif_flow_stats(netdev_flow, put->stats);
1351 }
1352 if (put->flags & DPIF_FP_ZERO_STATS) {
1353 clear_stats(netdev_flow);
72865317 1354 }
8a4e3a85 1355
61e7deb1 1356 ovsrcu_postpone(dp_netdev_actions_free, old_actions);
2c0ea78f 1357 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 1358 error = EEXIST;
2c0ea78f
GS
1359 } else {
1360 /* Overlapping flow. */
1361 error = EINVAL;
72865317
BP
1362 }
1363 }
8a4e3a85 1364 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1365
1366 return error;
72865317
BP
1367}
1368
72865317 1369static int
b99d3cee 1370dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
1371{
1372 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1373 struct dp_netdev_flow *netdev_flow;
14608a15 1374 struct flow key;
36956a7d
BP
1375 int error;
1376
b99d3cee 1377 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
1378 if (error) {
1379 return error;
1380 }
72865317 1381
8a4e3a85 1382 ovs_mutex_lock(&dp->flow_mutex);
06f81620 1383 fat_rwlock_wrlock(&dp->cls.rwlock);
2c0ea78f 1384 netdev_flow = dp_netdev_find_flow(dp, &key);
1763b4b8 1385 if (netdev_flow) {
b99d3cee 1386 if (del->stats) {
1763b4b8 1387 get_dpif_flow_stats(netdev_flow, del->stats);
feebdea2 1388 }
8a4e3a85 1389 dp_netdev_remove_flow(dp, netdev_flow);
72865317 1390 } else {
5279f8fd 1391 error = ENOENT;
72865317 1392 }
06f81620 1393 fat_rwlock_unlock(&dp->cls.rwlock);
8a4e3a85 1394 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1395
1396 return error;
72865317
BP
1397}
1398
ac64794a
BP
1399struct dpif_netdev_flow_dump {
1400 struct dpif_flow_dump up;
e723fd32
JS
1401 uint32_t bucket;
1402 uint32_t offset;
d2ad7ef1
JS
1403 int status;
1404 struct ovs_mutex mutex;
e723fd32
JS
1405};
1406
ac64794a
BP
1407static struct dpif_netdev_flow_dump *
1408dpif_netdev_flow_dump_cast(struct dpif_flow_dump *dump)
72865317 1409{
ac64794a 1410 return CONTAINER_OF(dump, struct dpif_netdev_flow_dump, up);
e723fd32
JS
1411}
1412
ac64794a
BP
1413static struct dpif_flow_dump *
1414dpif_netdev_flow_dump_create(const struct dpif *dpif_)
e723fd32 1415{
ac64794a 1416 struct dpif_netdev_flow_dump *dump;
e723fd32 1417
ac64794a
BP
1418 dump = xmalloc(sizeof *dump);
1419 dpif_flow_dump_init(&dump->up, dpif_);
1420 dump->bucket = 0;
1421 dump->offset = 0;
1422 dump->status = 0;
1423 ovs_mutex_init(&dump->mutex);
1424
1425 return &dump->up;
e723fd32
JS
1426}
1427
1428static int
ac64794a 1429dpif_netdev_flow_dump_destroy(struct dpif_flow_dump *dump_)
e723fd32 1430{
ac64794a 1431 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
e723fd32 1432
ac64794a
BP
1433 ovs_mutex_destroy(&dump->mutex);
1434 free(dump);
704a1e09
BP
1435 return 0;
1436}
1437
ac64794a
BP
1438struct dpif_netdev_flow_dump_thread {
1439 struct dpif_flow_dump_thread up;
1440 struct dpif_netdev_flow_dump *dump;
1441 struct odputil_keybuf keybuf;
1442 struct odputil_keybuf maskbuf;
1443};
1444
1445static struct dpif_netdev_flow_dump_thread *
1446dpif_netdev_flow_dump_thread_cast(struct dpif_flow_dump_thread *thread)
1447{
1448 return CONTAINER_OF(thread, struct dpif_netdev_flow_dump_thread, up);
1449}
1450
1451static struct dpif_flow_dump_thread *
1452dpif_netdev_flow_dump_thread_create(struct dpif_flow_dump *dump_)
1453{
1454 struct dpif_netdev_flow_dump *dump = dpif_netdev_flow_dump_cast(dump_);
1455 struct dpif_netdev_flow_dump_thread *thread;
1456
1457 thread = xmalloc(sizeof *thread);
1458 dpif_flow_dump_thread_init(&thread->up, &dump->up);
1459 thread->dump = dump;
1460 return &thread->up;
1461}
1462
1463static void
1464dpif_netdev_flow_dump_thread_destroy(struct dpif_flow_dump_thread *thread_)
1465{
1466 struct dpif_netdev_flow_dump_thread *thread
1467 = dpif_netdev_flow_dump_thread_cast(thread_);
1468
1469 free(thread);
1470}
1471
61e7deb1 1472/* XXX the caller must use 'actions' without quiescing */
704a1e09 1473static int
ac64794a
BP
1474dpif_netdev_flow_dump_next(struct dpif_flow_dump_thread *thread_,
1475 struct dpif_flow *f, int max_flows OVS_UNUSED)
1476{
1477 struct dpif_netdev_flow_dump_thread *thread
1478 = dpif_netdev_flow_dump_thread_cast(thread_);
1479 struct dpif_netdev_flow_dump *dump = thread->dump;
1480 struct dpif_netdev *dpif = dpif_netdev_cast(thread->up.dpif);
1481 struct dp_netdev *dp = get_dp_netdev(&dpif->dpif);
1763b4b8 1482 struct dp_netdev_flow *netdev_flow;
fbfe01de 1483 struct flow_wildcards wc;
ac64794a
BP
1484 struct dp_netdev_actions *dp_actions;
1485 struct ofpbuf buf;
d2ad7ef1 1486 int error;
14608a15 1487
ac64794a
BP
1488 ovs_mutex_lock(&dump->mutex);
1489 error = dump->status;
d2ad7ef1
JS
1490 if (!error) {
1491 struct hmap_node *node;
1492
1493 fat_rwlock_rdlock(&dp->cls.rwlock);
ac64794a 1494 node = hmap_at_position(&dp->flow_table, &dump->bucket, &dump->offset);
d2ad7ef1
JS
1495 if (node) {
1496 netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
d2ad7ef1
JS
1497 }
1498 fat_rwlock_unlock(&dp->cls.rwlock);
1499 if (!node) {
ac64794a 1500 dump->status = error = EOF;
d2ad7ef1 1501 }
8a4e3a85 1502 }
ac64794a 1503 ovs_mutex_unlock(&dump->mutex);
d2ad7ef1 1504 if (error) {
ac64794a 1505 return 0;
72865317 1506 }
704a1e09 1507
fbfe01de
AZ
1508 minimask_expand(&netdev_flow->cr.match.mask, &wc);
1509
ac64794a
BP
1510 /* Key. */
1511 ofpbuf_use_stack(&buf, &thread->keybuf, sizeof thread->keybuf);
1512 odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks,
1513 netdev_flow->flow.in_port.odp_port, true);
1514 f->key = ofpbuf_data(&buf);
1515 f->key_len = ofpbuf_size(&buf);
1516
1517 /* Mask. */
1518 ofpbuf_use_stack(&buf, &thread->maskbuf, sizeof thread->maskbuf);
1519 odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
1520 odp_to_u32(wc.masks.in_port.odp_port),
1521 SIZE_MAX, true);
1522 f->mask = ofpbuf_data(&buf);
1523 f->mask_len = ofpbuf_size(&buf);
45c626a3 1524
ac64794a
BP
1525 /* Actions. */
1526 dp_actions = dp_netdev_flow_get_actions(netdev_flow);
1527 f->actions = dp_actions->actions;
1528 f->actions_len = dp_actions->size;
704a1e09 1529
ac64794a
BP
1530 /* Stats. */
1531 get_dpif_flow_stats(netdev_flow, &f->stats);
feebdea2 1532
ac64794a 1533 return 1;
72865317
BP
1534}
1535
1536static int
758c456d 1537dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
72865317
BP
1538{
1539 struct dp_netdev *dp = get_dp_netdev(dpif);
8cbf4f47 1540 struct dpif_packet packet, *pp;
758c456d 1541 struct pkt_metadata *md = &execute->md;
72865317 1542
1f317cb5
PS
1543 if (ofpbuf_size(execute->packet) < ETH_HEADER_LEN ||
1544 ofpbuf_size(execute->packet) > UINT16_MAX) {
72865317
BP
1545 return EINVAL;
1546 }
1547
91088554 1548 packet.ofpbuf = *execute->packet;
8cbf4f47 1549 pp = &packet;
91088554 1550
8cbf4f47 1551 dp_netdev_execute_actions(dp, &pp, 1, false, md,
df1e5a3b 1552 execute->actions, execute->actions_len);
8a4e3a85 1553
91088554
DDP
1554 /* Even though may_steal is set to false, some actions could modify or
1555 * reallocate the ofpbuf memory. We need to pass those changes to the
1556 * caller */
1557 *execute->packet = packet.ofpbuf;
1558
758c456d 1559 return 0;
72865317
BP
1560}
1561
63be20be
AW
1562static void
1563dp_netdev_destroy_all_queues(struct dp_netdev *dp)
1564 OVS_REQ_WRLOCK(dp->queue_rwlock)
1565{
1566 size_t i;
1567
1568 dp_netdev_purge_queues(dp);
1569
1570 for (i = 0; i < dp->n_handlers; i++) {
1571 struct dp_netdev_queue *q = &dp->handler_queues[i];
1572
1573 ovs_mutex_destroy(&q->mutex);
1574 seq_destroy(q->seq);
1575 }
1576 free(dp->handler_queues);
1577 dp->handler_queues = NULL;
1578 dp->n_handlers = 0;
1579}
1580
1581static void
1582dp_netdev_refresh_queues(struct dp_netdev *dp, uint32_t n_handlers)
1583 OVS_REQ_WRLOCK(dp->queue_rwlock)
1584{
1585 if (dp->n_handlers != n_handlers) {
1586 size_t i;
1587
1588 dp_netdev_destroy_all_queues(dp);
1589
1590 dp->n_handlers = n_handlers;
1591 dp->handler_queues = xzalloc(n_handlers * sizeof *dp->handler_queues);
1592
1593 for (i = 0; i < n_handlers; i++) {
1594 struct dp_netdev_queue *q = &dp->handler_queues[i];
1595
1596 ovs_mutex_init(&q->mutex);
1597 q->seq = seq_create();
1598 }
1599 }
1600}
1601
72865317 1602static int
63be20be 1603dpif_netdev_recv_set(struct dpif *dpif, bool enable)
72865317 1604{
63be20be
AW
1605 struct dp_netdev *dp = get_dp_netdev(dpif);
1606
1607 if ((dp->handler_queues != NULL) == enable) {
1608 return 0;
1609 }
1610
1611 fat_rwlock_wrlock(&dp->queue_rwlock);
1612 if (!enable) {
1613 dp_netdev_destroy_all_queues(dp);
1614 } else {
1615 dp_netdev_refresh_queues(dp, 1);
1616 }
1617 fat_rwlock_unlock(&dp->queue_rwlock);
1618
82272ede 1619 return 0;
72865317
BP
1620}
1621
1954e6bb 1622static int
63be20be 1623dpif_netdev_handlers_set(struct dpif *dpif, uint32_t n_handlers)
1954e6bb 1624{
63be20be
AW
1625 struct dp_netdev *dp = get_dp_netdev(dpif);
1626
1627 fat_rwlock_wrlock(&dp->queue_rwlock);
1628 if (dp->handler_queues) {
1629 dp_netdev_refresh_queues(dp, n_handlers);
1630 }
1631 fat_rwlock_unlock(&dp->queue_rwlock);
1632
1954e6bb
AW
1633 return 0;
1634}
1635
5bf93d67
EJ
1636static int
1637dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1638 uint32_t queue_id, uint32_t *priority)
1639{
1640 *priority = queue_id;
1641 return 0;
1642}
1643
63be20be
AW
1644static bool
1645dp_netdev_recv_check(const struct dp_netdev *dp, const uint32_t handler_id)
1646 OVS_REQ_RDLOCK(dp->queue_rwlock)
72865317 1647{
63be20be 1648 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
72865317 1649
63be20be
AW
1650 if (!dp->handler_queues) {
1651 VLOG_WARN_RL(&rl, "receiving upcall disabled");
1652 return false;
72865317 1653 }
63be20be
AW
1654
1655 if (handler_id >= dp->n_handlers) {
1656 VLOG_WARN_RL(&rl, "handler index out of bound");
1657 return false;
1658 }
1659
1660 return true;
72865317
BP
1661}
1662
1663static int
63be20be 1664dpif_netdev_recv(struct dpif *dpif, uint32_t handler_id,
1954e6bb 1665 struct dpif_upcall *upcall, struct ofpbuf *buf)
72865317 1666{
f5126b57 1667 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 1668 struct dp_netdev_queue *q;
63be20be
AW
1669 int error = 0;
1670
1671 fat_rwlock_rdlock(&dp->queue_rwlock);
5279f8fd 1672
63be20be
AW
1673 if (!dp_netdev_recv_check(dp, handler_id)) {
1674 error = EAGAIN;
1675 goto out;
1676 }
1677
1678 q = &dp->handler_queues[handler_id];
1679 ovs_mutex_lock(&q->mutex);
1680 if (q->head != q->tail) {
d88b629b
BP
1681 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1682
1683 *upcall = u->upcall;
b3907fbc 1684
90a7c55e 1685 ofpbuf_uninit(buf);
d88b629b 1686 *buf = u->buf;
72865317 1687 } else {
5279f8fd 1688 error = EAGAIN;
72865317 1689 }
63be20be
AW
1690 ovs_mutex_unlock(&q->mutex);
1691
1692out:
1693 fat_rwlock_unlock(&dp->queue_rwlock);
5279f8fd
BP
1694
1695 return error;
72865317
BP
1696}
1697
1698static void
63be20be 1699dpif_netdev_recv_wait(struct dpif *dpif, uint32_t handler_id)
72865317 1700{
d33ed218 1701 struct dp_netdev *dp = get_dp_netdev(dpif);
63be20be 1702 struct dp_netdev_queue *q;
d33ed218 1703 uint64_t seq;
5279f8fd 1704
63be20be
AW
1705 fat_rwlock_rdlock(&dp->queue_rwlock);
1706
1707 if (!dp_netdev_recv_check(dp, handler_id)) {
1708 goto out;
1709 }
1710
1711 q = &dp->handler_queues[handler_id];
1712 ovs_mutex_lock(&q->mutex);
1713 seq = seq_read(q->seq);
1714 if (q->head != q->tail) {
72865317 1715 poll_immediate_wake();
d33ed218 1716 } else {
63be20be 1717 seq_wait(q->seq, seq);
72865317 1718 }
63be20be
AW
1719
1720 ovs_mutex_unlock(&q->mutex);
1721
1722out:
1723 fat_rwlock_unlock(&dp->queue_rwlock);
72865317 1724}
1ba530f4
BP
1725
1726static void
1727dpif_netdev_recv_purge(struct dpif *dpif)
1728{
1729 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
f5126b57 1730
63be20be 1731 fat_rwlock_wrlock(&dpif_netdev->dp->queue_rwlock);
1ba530f4 1732 dp_netdev_purge_queues(dpif_netdev->dp);
63be20be 1733 fat_rwlock_unlock(&dpif_netdev->dp->queue_rwlock);
1ba530f4 1734}
72865317 1735\f
a84cb64a
BP
1736/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1737 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1738 * 'ofpacts'. */
1739struct dp_netdev_actions *
1740dp_netdev_actions_create(const struct nlattr *actions, size_t size)
1741{
1742 struct dp_netdev_actions *netdev_actions;
1743
1744 netdev_actions = xmalloc(sizeof *netdev_actions);
a84cb64a
BP
1745 netdev_actions->actions = xmemdup(actions, size);
1746 netdev_actions->size = size;
1747
1748 return netdev_actions;
1749}
1750
a84cb64a 1751struct dp_netdev_actions *
61e7deb1 1752dp_netdev_flow_get_actions(const struct dp_netdev_flow *flow)
a84cb64a 1753{
61e7deb1 1754 return ovsrcu_get(struct dp_netdev_actions *, &flow->actions);
a84cb64a
BP
1755}
1756
61e7deb1
BP
1757static void
1758dp_netdev_actions_free(struct dp_netdev_actions *actions)
a84cb64a 1759{
61e7deb1
BP
1760 free(actions->actions);
1761 free(actions);
a84cb64a
BP
1762}
1763\f
e4cfed38 1764
5794e276 1765static void
f7791740 1766dp_netdev_process_rxq_port(struct dp_netdev *dp,
e4cfed38 1767 struct dp_netdev_port *port,
f7791740 1768 struct netdev_rxq *rxq)
e4cfed38 1769{
8cbf4f47
DDP
1770 struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
1771 int error, cnt;
e4cfed38 1772
8cbf4f47 1773 error = netdev_rxq_recv(rxq, packets, &cnt);
e4cfed38 1774 if (!error) {
8cbf4f47 1775 dp_netdev_port_input(dp, packets, cnt, port->port_no);
e4cfed38
PS
1776 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1777 static struct vlog_rate_limit rl
1778 = VLOG_RATE_LIMIT_INIT(1, 5);
1779
1780 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
1781 netdev_get_name(port->netdev),
1782 ovs_strerror(error));
1783 }
1784}
1785
1786static void
1787dpif_netdev_run(struct dpif *dpif)
1788{
1789 struct dp_netdev_port *port;
1790 struct dp_netdev *dp = get_dp_netdev(dpif);
1791
a532e683 1792 CMAP_FOR_EACH (port, node, &dp->ports) {
55c955bd
PS
1793 if (!netdev_is_pmd(port->netdev)) {
1794 int i;
1795
1796 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1797 dp_netdev_process_rxq_port(dp, port, port->rxq[i]);
1798 }
e4cfed38
PS
1799 }
1800 }
e4cfed38
PS
1801}
1802
1803static void
1804dpif_netdev_wait(struct dpif *dpif)
1805{
1806 struct dp_netdev_port *port;
1807 struct dp_netdev *dp = get_dp_netdev(dpif);
1808
59e6d833 1809 ovs_mutex_lock(&dp_netdev_mutex);
a532e683 1810 CMAP_FOR_EACH (port, node, &dp->ports) {
55c955bd
PS
1811 if (!netdev_is_pmd(port->netdev)) {
1812 int i;
1813
1814 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1815 netdev_rxq_wait(port->rxq[i]);
1816 }
e4cfed38
PS
1817 }
1818 }
59e6d833 1819 ovs_mutex_unlock(&dp_netdev_mutex);
e4cfed38
PS
1820}
1821
f7791740 1822struct rxq_poll {
e4cfed38 1823 struct dp_netdev_port *port;
55c955bd 1824 struct netdev_rxq *rx;
e4cfed38
PS
1825};
1826
1827static int
1828pmd_load_queues(struct pmd_thread *f,
f7791740 1829 struct rxq_poll **ppoll_list, int poll_cnt)
e4cfed38
PS
1830{
1831 struct dp_netdev *dp = f->dp;
f7791740 1832 struct rxq_poll *poll_list = *ppoll_list;
e4cfed38
PS
1833 struct dp_netdev_port *port;
1834 int id = f->id;
1835 int index;
1836 int i;
1837
1838 /* Simple scheduler for netdev rx polling. */
e4cfed38
PS
1839 for (i = 0; i < poll_cnt; i++) {
1840 port_unref(poll_list[i].port);
1841 }
1842
1843 poll_cnt = 0;
1844 index = 0;
1845
a532e683 1846 CMAP_FOR_EACH (port, node, &f->dp->ports) {
e4cfed38 1847 if (netdev_is_pmd(port->netdev)) {
55c955bd
PS
1848 int i;
1849
1850 for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
1851 if ((index % dp->n_pmd_threads) == id) {
1852 poll_list = xrealloc(poll_list, sizeof *poll_list * (poll_cnt + 1));
e4cfed38 1853
55c955bd
PS
1854 port_ref(port);
1855 poll_list[poll_cnt].port = port;
1856 poll_list[poll_cnt].rx = port->rxq[i];
1857 poll_cnt++;
1858 }
1859 index++;
e4cfed38 1860 }
e4cfed38
PS
1861 }
1862 }
1863
e4cfed38
PS
1864 *ppoll_list = poll_list;
1865 return poll_cnt;
1866}
1867
6c3eee82 1868static void *
e4cfed38 1869pmd_thread_main(void *f_)
6c3eee82 1870{
e4cfed38 1871 struct pmd_thread *f = f_;
6c3eee82 1872 struct dp_netdev *dp = f->dp;
e4cfed38 1873 unsigned int lc = 0;
f7791740 1874 struct rxq_poll *poll_list;
e4cfed38
PS
1875 unsigned int port_seq;
1876 int poll_cnt;
1877 int i;
6c3eee82 1878
e4cfed38
PS
1879 poll_cnt = 0;
1880 poll_list = NULL;
1881
8617afff 1882 pmd_thread_setaffinity_cpu(f->id);
e4cfed38
PS
1883reload:
1884 poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt);
1885 atomic_read(&f->change_seq, &port_seq);
6c3eee82 1886
e4cfed38
PS
1887 for (;;) {
1888 unsigned int c_port_seq;
6c3eee82
BP
1889 int i;
1890
e4cfed38 1891 for (i = 0; i < poll_cnt; i++) {
55c955bd 1892 dp_netdev_process_rxq_port(dp, poll_list[i].port, poll_list[i].rx);
e4cfed38
PS
1893 }
1894
1895 if (lc++ > 1024) {
1896 ovsrcu_quiesce();
6c3eee82 1897
e4cfed38
PS
1898 /* TODO: need completely userspace based signaling method.
1899 * to keep this thread entirely in userspace.
1900 * For now using atomic counter. */
1901 lc = 0;
1902 atomic_read_explicit(&f->change_seq, &c_port_seq, memory_order_consume);
1903 if (c_port_seq != port_seq) {
6c3eee82
BP
1904 break;
1905 }
1906 }
e4cfed38 1907 }
6c3eee82 1908
e4cfed38
PS
1909 if (!latch_is_set(&f->dp->exit_latch)){
1910 goto reload;
1911 }
6c3eee82 1912
e4cfed38
PS
1913 for (i = 0; i < poll_cnt; i++) {
1914 port_unref(poll_list[i].port);
6c3eee82 1915 }
6c3eee82 1916
e4cfed38 1917 free(poll_list);
6c3eee82
BP
1918 return NULL;
1919}
1920
1921static void
e4cfed38 1922dp_netdev_set_pmd_threads(struct dp_netdev *dp, int n)
6c3eee82
BP
1923{
1924 int i;
1925
e4cfed38 1926 if (n == dp->n_pmd_threads) {
6c3eee82
BP
1927 return;
1928 }
1929
1930 /* Stop existing threads. */
1931 latch_set(&dp->exit_latch);
e4cfed38
PS
1932 dp_netdev_reload_pmd_threads(dp);
1933 for (i = 0; i < dp->n_pmd_threads; i++) {
1934 struct pmd_thread *f = &dp->pmd_threads[i];
6c3eee82
BP
1935
1936 xpthread_join(f->thread, NULL);
1937 }
1938 latch_poll(&dp->exit_latch);
e4cfed38 1939 free(dp->pmd_threads);
6c3eee82
BP
1940
1941 /* Start new threads. */
e4cfed38
PS
1942 dp->pmd_threads = xmalloc(n * sizeof *dp->pmd_threads);
1943 dp->n_pmd_threads = n;
1944
6c3eee82 1945 for (i = 0; i < n; i++) {
e4cfed38 1946 struct pmd_thread *f = &dp->pmd_threads[i];
6c3eee82
BP
1947
1948 f->dp = dp;
e4cfed38
PS
1949 f->id = i;
1950 atomic_store(&f->change_seq, 1);
1951
1952 /* Each thread will distribute all devices rx-queues among
1953 * themselves. */
8ba0a522 1954 f->thread = ovs_thread_create("pmd", pmd_thread_main, f);
6c3eee82
BP
1955 }
1956}
e4cfed38 1957
6c3eee82 1958\f
679ba04c
BP
1959static void *
1960dp_netdev_flow_stats_new_cb(void)
1961{
1962 struct dp_netdev_flow_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1963 ovs_mutex_init(&bucket->mutex);
1964 return bucket;
1965}
1966
72865317 1967static void
1763b4b8 1968dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
8cbf4f47
DDP
1969 int cnt, int size,
1970 uint16_t tcp_flags)
72865317 1971{
679ba04c
BP
1972 long long int now = time_msec();
1973 struct dp_netdev_flow_stats *bucket;
1974
1975 bucket = ovsthread_stats_bucket_get(&netdev_flow->stats,
1976 dp_netdev_flow_stats_new_cb);
1977
1978 ovs_mutex_lock(&bucket->mutex);
1979 bucket->used = MAX(now, bucket->used);
8cbf4f47
DDP
1980 bucket->packet_count += cnt;
1981 bucket->byte_count += size;
679ba04c
BP
1982 bucket->tcp_flags |= tcp_flags;
1983 ovs_mutex_unlock(&bucket->mutex);
72865317
BP
1984}
1985
51852a57
BP
1986static void *
1987dp_netdev_stats_new_cb(void)
1988{
1989 struct dp_netdev_stats *bucket = xzalloc_cacheline(sizeof *bucket);
1990 ovs_mutex_init(&bucket->mutex);
1991 return bucket;
1992}
1993
1994static void
8cbf4f47 1995dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int cnt)
51852a57
BP
1996{
1997 struct dp_netdev_stats *bucket;
1998
1999 bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
2000 ovs_mutex_lock(&bucket->mutex);
8cbf4f47 2001 bucket->n[type] += cnt;
51852a57
BP
2002 ovs_mutex_unlock(&bucket->mutex);
2003}
2004
8cbf4f47
DDP
2005struct batch_pkt_execute {
2006 unsigned int packet_count;
2007 unsigned int byte_count;
2008 uint16_t tcp_flags;
2009
2010 struct dp_netdev_flow *flow;
2011
2012 struct dpif_packet *packets[NETDEV_MAX_RX_BATCH];
2013 struct pkt_metadata md;
2014};
2015
2016static inline void
2017packet_batch_update(struct batch_pkt_execute *batch,
2018 struct dpif_packet *packet, const struct miniflow *mf)
2019{
2020 batch->tcp_flags |= miniflow_get_tcp_flags(mf);
2021 batch->packets[batch->packet_count++] = packet;
2022 batch->byte_count += ofpbuf_size(&packet->ofpbuf);
2023}
2024
2025static inline void
2026packet_batch_init(struct batch_pkt_execute *batch, struct dp_netdev_flow *flow,
2027 struct dpif_packet *packet, struct pkt_metadata *md,
2028 const struct miniflow *mf)
2029{
2030 batch->flow = flow;
2031 batch->md = *md;
2032 batch->packets[0] = packet;
2033
2034 batch->packet_count = 0;
2035 batch->byte_count = 0;
2036 batch->tcp_flags = 0;
2037
2038 packet_batch_update(batch, packet, mf);
2039}
2040
2041static inline void
2042packet_batch_execute(struct batch_pkt_execute *batch, struct dp_netdev *dp)
2043{
2044 struct dp_netdev_actions *actions;
2045 struct dp_netdev_flow *flow = batch->flow;
2046
2047 dp_netdev_flow_used(batch->flow, batch->packet_count, batch->byte_count,
2048 batch->tcp_flags);
2049
2050 actions = dp_netdev_flow_get_actions(flow);
2051
2052 dp_netdev_execute_actions(dp, batch->packets,
2053 batch->packet_count, true, &batch->md,
2054 actions->actions, actions->size);
2055
2056 dp_netdev_count_packet(dp, DP_STAT_HIT, batch->packet_count);
2057}
2058
72865317 2059static void
8cbf4f47 2060dp_netdev_input(struct dp_netdev *dp, struct dpif_packet **packets, int cnt,
adcf00ba 2061 struct pkt_metadata *md)
72865317 2062{
8cbf4f47
DDP
2063 struct batch_pkt_execute batch;
2064
2065 struct netdev_flow_key key;
2066
2067 int i;
2068
2069 batch.flow = NULL;
2070
27bbe15d 2071 miniflow_initialize(&key.flow, key.buf);
4f150744 2072
8cbf4f47
DDP
2073 for (i = 0; i < cnt; i++) {
2074 struct dp_netdev_flow *netdev_flow;
2075 struct ofpbuf *buf = &packets[i]->ofpbuf;
2076
2077 if (ofpbuf_size(buf) < ETH_HEADER_LEN) {
2078 dpif_packet_delete(packets[i]);
2079 continue;
2080 }
a84cb64a 2081
8cbf4f47 2082 miniflow_extract(buf, md, &key.flow);
679ba04c 2083
8cbf4f47
DDP
2084 netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
2085
2086 if (netdev_flow) {
2087 if (!batch.flow) {
2088 packet_batch_init(&batch, netdev_flow, packets[i], md,
2089 &key.flow);
2090 } else if (batch.flow == netdev_flow) {
2091 packet_batch_update(&batch, packets[i], &key.flow);
2092 } else {
2093 packet_batch_execute(&batch, dp);
2094 packet_batch_init(&batch, netdev_flow, packets[i], md,
2095 &key.flow);
2096 }
2097 } else if (dp->handler_queues) {
2098 dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
2099 dp_netdev_output_userspace(dp, &buf, 1,
2100 miniflow_hash_5tuple(&key.flow, 0)
2101 % dp->n_handlers,
2102 DPIF_UC_MISS, &key.flow, NULL);
2103 }
2104 }
2105
2106 if (batch.flow) {
2107 packet_batch_execute(&batch, dp);
72865317
BP
2108 }
2109}
2110
adcf00ba 2111static void
8cbf4f47
DDP
2112dp_netdev_port_input(struct dp_netdev *dp, struct dpif_packet **packets,
2113 int cnt, odp_port_t port_no)
adcf00ba
AZ
2114{
2115 uint32_t *recirc_depth = recirc_depth_get();
8cbf4f47 2116 struct pkt_metadata md = PKT_METADATA_INITIALIZER(port_no);
adcf00ba
AZ
2117
2118 *recirc_depth = 0;
8cbf4f47 2119 dp_netdev_input(dp, packets, cnt, &md);
adcf00ba
AZ
2120}
2121
72865317 2122static int
8cbf4f47
DDP
2123dp_netdev_queue_userspace_packet(struct dp_netdev_queue *q,
2124 struct ofpbuf *packet, int type,
2125 const struct miniflow *key,
2126 const struct nlattr *userdata)
2127OVS_REQUIRES(q->mutex)
72865317 2128{
e995e3df
BP
2129 if (q->head - q->tail < MAX_QUEUE_LEN) {
2130 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
2131 struct dpif_upcall *upcall = &u->upcall;
2132 struct ofpbuf *buf = &u->buf;
2133 size_t buf_size;
4f150744 2134 struct flow flow;
e995e3df 2135
63be20be 2136 upcall->type = type;
e995e3df
BP
2137
2138 /* Allocate buffer big enough for everything. */
da546e07 2139 buf_size = ODPUTIL_FLOW_KEY_BYTES;
e995e3df
BP
2140 if (userdata) {
2141 buf_size += NLA_ALIGN(userdata->nla_len);
2142 }
2143 ofpbuf_init(buf, buf_size);
72865317 2144
e995e3df 2145 /* Put ODP flow. */
4f150744 2146 miniflow_expand(key, &flow);
7ce2769e 2147 odp_flow_key_from_flow(buf, &flow, NULL, flow.in_port.odp_port, true);
1f317cb5
PS
2148 upcall->key = ofpbuf_data(buf);
2149 upcall->key_len = ofpbuf_size(buf);
d88b629b 2150
e995e3df
BP
2151 /* Put userdata. */
2152 if (userdata) {
2153 upcall->userdata = ofpbuf_put(buf, userdata,
8cbf4f47 2154 NLA_ALIGN(userdata->nla_len));
e995e3df 2155 }
856081f6 2156
143859ec 2157 upcall->packet = *packet;
856081f6 2158
63be20be 2159 seq_change(q->seq);
d33ed218 2160
8cbf4f47 2161 return 0;
e995e3df 2162 } else {
143859ec 2163 ofpbuf_delete(packet);
8cbf4f47
DDP
2164 return ENOBUFS;
2165 }
2166
2167}
2168
2169static int
2170dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf **packets,
2171 int cnt, int queue_no, int type,
2172 const struct miniflow *key,
2173 const struct nlattr *userdata)
2174{
2175 struct dp_netdev_queue *q;
2176 int error;
2177 int i;
2178
2179 fat_rwlock_rdlock(&dp->queue_rwlock);
2180 q = &dp->handler_queues[queue_no];
2181 ovs_mutex_lock(&q->mutex);
2182 for (i = 0; i < cnt; i++) {
2183 struct ofpbuf *packet = packets[i];
2184
2185 error = dp_netdev_queue_userspace_packet(q, packet, type, key,
2186 userdata);
2187 if (error == ENOBUFS) {
2188 dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
2189 }
e995e3df 2190 }
63be20be
AW
2191 ovs_mutex_unlock(&q->mutex);
2192 fat_rwlock_unlock(&dp->queue_rwlock);
f5126b57
BP
2193
2194 return error;
72865317
BP
2195}
2196
9080a111
JR
2197struct dp_netdev_execute_aux {
2198 struct dp_netdev *dp;
9080a111
JR
2199};
2200
2201static void
8cbf4f47 2202dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt,
572f732a 2203 struct pkt_metadata *md,
09f9da0b 2204 const struct nlattr *a, bool may_steal)
8a4e3a85 2205 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
2206{
2207 struct dp_netdev_execute_aux *aux = aux_;
09f9da0b 2208 int type = nl_attr_type(a);
8a4e3a85 2209 struct dp_netdev_port *p;
adcf00ba 2210 uint32_t *depth = recirc_depth_get();
8cbf4f47 2211 int i;
9080a111 2212
09f9da0b
JR
2213 switch ((enum ovs_action_attr)type) {
2214 case OVS_ACTION_ATTR_OUTPUT:
8a4e3a85
BP
2215 p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
2216 if (p) {
8cbf4f47 2217 netdev_send(p->netdev, packets, cnt, may_steal);
8a4e3a85 2218 }
09f9da0b
JR
2219 break;
2220
2221 case OVS_ACTION_ATTR_USERSPACE: {
2222 const struct nlattr *userdata;
8cbf4f47 2223 struct netdev_flow_key key;
4fc65926 2224
09f9da0b 2225 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
8cbf4f47
DDP
2226
2227 miniflow_initialize(&key.flow, key.buf);
2228
2229 for (i = 0; i < cnt; i++) {
2230 struct ofpbuf *packet, *userspace_packet;
2231
2232 packet = &packets[i]->ofpbuf;
2233
2234 miniflow_extract(packet, md, &key.flow);
2235
2236 userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
2237
2238 dp_netdev_output_userspace(aux->dp, &userspace_packet, 1,
2239 miniflow_hash_5tuple(&key.flow, 0)
2240 % aux->dp->n_handlers,
2241 DPIF_UC_ACTION, &key.flow,
2242 userdata);
2243 }
09f9da0b 2244 break;
da546e07 2245 }
572f732a 2246
347bf289
AZ
2247 case OVS_ACTION_ATTR_HASH: {
2248 const struct ovs_action_hash *hash_act;
8cbf4f47 2249 struct netdev_flow_key key;
347bf289
AZ
2250 uint32_t hash;
2251
2252 hash_act = nl_attr_get(a);
8cbf4f47
DDP
2253
2254 miniflow_initialize(&key.flow, key.buf);
2255
2256 for (i = 0; i < cnt; i++) {
2257
2258 /* TODO: this is slow. Use RSS hash in the future */
2259 miniflow_extract(&packets[i]->ofpbuf, md, &key.flow);
2260
2261 if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
2262 /* Hash need not be symmetric, nor does it need to include
2263 * L2 fields. */
2264 hash = miniflow_hash_5tuple(&key.flow, hash_act->hash_basis);
2265 } else {
2266 VLOG_WARN("Unknown hash algorithm specified "
2267 "for the hash action.");
2268 hash = 2;
2269 }
2270
347bf289
AZ
2271 if (!hash) {
2272 hash = 1; /* 0 is not valid */
2273 }
2274
8cbf4f47
DDP
2275 if (i == 0) {
2276 md->dp_hash = hash;
2277 }
2278 packets[i]->dp_hash = hash;
347bf289 2279 }
347bf289
AZ
2280 break;
2281 }
2282
adcf00ba
AZ
2283 case OVS_ACTION_ATTR_RECIRC:
2284 if (*depth < MAX_RECIRC_DEPTH) {
572f732a 2285
adcf00ba 2286 (*depth)++;
8cbf4f47
DDP
2287 for (i = 0; i < cnt; i++) {
2288 struct dpif_packet *recirc_pkt;
2289 struct pkt_metadata recirc_md = *md;
2290
2291 recirc_pkt = (may_steal) ? packets[i]
2292 : dpif_packet_clone(packets[i]);
2293
2294 recirc_md.recirc_id = nl_attr_get_u32(a);
2295
2296 /* Hash is private to each packet */
2297 recirc_md.dp_hash = packets[i]->dp_hash;
2298
2299 dp_netdev_input(aux->dp, &recirc_pkt, 1, &recirc_md);
2300 }
adcf00ba
AZ
2301 (*depth)--;
2302
adcf00ba
AZ
2303 break;
2304 } else {
2305 VLOG_WARN("Packet dropped. Max recirculation depth exceeded.");
2306 }
572f732a 2307 break;
572f732a 2308
09f9da0b
JR
2309 case OVS_ACTION_ATTR_PUSH_VLAN:
2310 case OVS_ACTION_ATTR_POP_VLAN:
2311 case OVS_ACTION_ATTR_PUSH_MPLS:
2312 case OVS_ACTION_ATTR_POP_MPLS:
2313 case OVS_ACTION_ATTR_SET:
2314 case OVS_ACTION_ATTR_SAMPLE:
2315 case OVS_ACTION_ATTR_UNSPEC:
2316 case __OVS_ACTION_ATTR_MAX:
2317 OVS_NOT_REACHED();
da546e07 2318 }
98403001
BP
2319}
2320
4edb9ae9 2321static void
8cbf4f47
DDP
2322dp_netdev_execute_actions(struct dp_netdev *dp,
2323 struct dpif_packet **packets, int cnt,
2324 bool may_steal, struct pkt_metadata *md,
9080a111 2325 const struct nlattr *actions, size_t actions_len)
72865317 2326{
8cbf4f47 2327 struct dp_netdev_execute_aux aux = {dp};
9080a111 2328
8cbf4f47
DDP
2329 odp_execute_actions(&aux, packets, cnt, may_steal, md, actions,
2330 actions_len, dp_execute_cb);
72865317
BP
2331}
2332
2333const struct dpif_class dpif_netdev_class = {
72865317 2334 "netdev",
2197d7ab 2335 dpif_netdev_enumerate,
0aeaabc8 2336 dpif_netdev_port_open_type,
72865317
BP
2337 dpif_netdev_open,
2338 dpif_netdev_close,
7dab847a 2339 dpif_netdev_destroy,
e4cfed38
PS
2340 dpif_netdev_run,
2341 dpif_netdev_wait,
72865317 2342 dpif_netdev_get_stats,
72865317
BP
2343 dpif_netdev_port_add,
2344 dpif_netdev_port_del,
2345 dpif_netdev_port_query_by_number,
2346 dpif_netdev_port_query_by_name,
98403001 2347 NULL, /* port_get_pid */
b0ec0f27
BP
2348 dpif_netdev_port_dump_start,
2349 dpif_netdev_port_dump_next,
2350 dpif_netdev_port_dump_done,
72865317
BP
2351 dpif_netdev_port_poll,
2352 dpif_netdev_port_poll_wait,
72865317
BP
2353 dpif_netdev_flow_get,
2354 dpif_netdev_flow_put,
2355 dpif_netdev_flow_del,
2356 dpif_netdev_flow_flush,
ac64794a
BP
2357 dpif_netdev_flow_dump_create,
2358 dpif_netdev_flow_dump_destroy,
2359 dpif_netdev_flow_dump_thread_create,
2360 dpif_netdev_flow_dump_thread_destroy,
704a1e09 2361 dpif_netdev_flow_dump_next,
72865317 2362 dpif_netdev_execute,
6bc60024 2363 NULL, /* operate */
a12b3ead 2364 dpif_netdev_recv_set,
1954e6bb 2365 dpif_netdev_handlers_set,
5bf93d67 2366 dpif_netdev_queue_to_priority,
72865317
BP
2367 dpif_netdev_recv,
2368 dpif_netdev_recv_wait,
1ba530f4 2369 dpif_netdev_recv_purge,
72865317 2370};
614c4892 2371
74cc3969
BP
2372static void
2373dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
2374 const char *argv[], void *aux OVS_UNUSED)
2375{
59e6d833
BP
2376 struct dp_netdev_port *old_port;
2377 struct dp_netdev_port *new_port;
74cc3969 2378 struct dp_netdev *dp;
ff073a71 2379 odp_port_t port_no;
74cc3969 2380
8a4e3a85 2381 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
2382 dp = shash_find_data(&dp_netdevs, argv[1]);
2383 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 2384 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
2385 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
2386 return;
2387 }
8a4e3a85
BP
2388 ovs_refcount_ref(&dp->ref_cnt);
2389 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 2390
59e6d833
BP
2391 ovs_mutex_lock(&dp->port_mutex);
2392 if (get_port_by_name(dp, argv[2], &old_port)) {
74cc3969 2393 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 2394 goto exit;
74cc3969
BP
2395 }
2396
ff073a71
BP
2397 port_no = u32_to_odp(atoi(argv[3]));
2398 if (!port_no || port_no == ODPP_NONE) {
74cc3969 2399 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 2400 goto exit;
74cc3969 2401 }
ff073a71 2402 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 2403 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 2404 goto exit;
74cc3969 2405 }
59e6d833
BP
2406
2407 /* Remove old port. */
2408 cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
2409 ovsrcu_postpone(free, old_port);
2410
2411 /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
2412 new_port = xmemdup(old_port, sizeof *old_port);
2413 new_port->port_no = port_no;
2414 cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
2415
d33ed218 2416 seq_change(dp->port_seq);
74cc3969 2417 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
2418
2419exit:
59e6d833 2420 ovs_mutex_unlock(&dp->port_mutex);
8a4e3a85 2421 dp_netdev_unref(dp);
74cc3969
BP
2422}
2423
c40b890f
BP
2424static void
2425dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED,
2426 const char *argv[], void *aux OVS_UNUSED)
2427{
2428 struct dp_netdev_port *port;
2429 struct dp_netdev *dp;
2430
2431 ovs_mutex_lock(&dp_netdev_mutex);
2432 dp = shash_find_data(&dp_netdevs, argv[1]);
2433 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
2434 ovs_mutex_unlock(&dp_netdev_mutex);
2435 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
2436 return;
2437 }
2438 ovs_refcount_ref(&dp->ref_cnt);
2439 ovs_mutex_unlock(&dp_netdev_mutex);
2440
2441 ovs_mutex_lock(&dp->port_mutex);
2442 if (get_port_by_name(dp, argv[2], &port)) {
2443 unixctl_command_reply_error(conn, "unknown port");
2444 } else if (port->port_no == ODPP_LOCAL) {
2445 unixctl_command_reply_error(conn, "can't delete local port");
2446 } else {
2447 do_del_port(dp, port);
2448 unixctl_command_reply(conn, NULL);
2449 }
2450 ovs_mutex_unlock(&dp->port_mutex);
2451
2452 dp_netdev_unref(dp);
2453}
2454
0cbfe35d
BP
2455static void
2456dpif_dummy_register__(const char *type)
2457{
2458 struct dpif_class *class;
2459
2460 class = xmalloc(sizeof *class);
2461 *class = dpif_netdev_class;
2462 class->type = xstrdup(type);
2463 dp_register_provider(class);
2464}
2465
614c4892 2466void
0cbfe35d 2467dpif_dummy_register(bool override)
614c4892 2468{
0cbfe35d
BP
2469 if (override) {
2470 struct sset types;
2471 const char *type;
2472
2473 sset_init(&types);
2474 dp_enumerate_types(&types);
2475 SSET_FOR_EACH (type, &types) {
2476 if (!dp_unregister_provider(type)) {
2477 dpif_dummy_register__(type);
2478 }
2479 }
2480 sset_destroy(&types);
614c4892 2481 }
0cbfe35d
BP
2482
2483 dpif_dummy_register__("dummy");
74cc3969
BP
2484
2485 unixctl_command_register("dpif-dummy/change-port-number",
2486 "DP PORT NEW-NUMBER",
2487 3, 3, dpif_dummy_change_port_number, NULL);
c40b890f
BP
2488 unixctl_command_register("dpif-dummy/delete-port", "DP PORT",
2489 2, 2, dpif_dummy_delete_port, NULL);
614c4892 2490}