]> git.proxmox.com Git - ovs.git/blame - lib/dpif-netdev.c
dpif-netdev: Make thread-safety much more granular.
[ovs.git] / lib / dpif-netdev.c
CommitLineData
72865317 1/*
ff073a71 2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
72865317
BP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpif.h"
19
72865317
BP
20#include <ctype.h>
21#include <errno.h>
22#include <fcntl.h>
23#include <inttypes.h>
72865317 24#include <netinet/in.h>
9d82ec47 25#include <sys/socket.h>
7f3adc00 26#include <net/if.h>
cdee00fd 27#include <stdint.h>
72865317
BP
28#include <stdlib.h>
29#include <string.h>
30#include <sys/ioctl.h>
31#include <sys/stat.h>
72865317
BP
32#include <unistd.h>
33
2c0ea78f 34#include "classifier.h"
72865317 35#include "csum.h"
614c4892 36#include "dpif.h"
72865317 37#include "dpif-provider.h"
614c4892 38#include "dummy.h"
36956a7d 39#include "dynamic-string.h"
72865317
BP
40#include "flow.h"
41#include "hmap.h"
42#include "list.h"
8c301900 43#include "meta-flow.h"
72865317 44#include "netdev.h"
de281153 45#include "netdev-vport.h"
cdee00fd 46#include "netlink.h"
f094af7b 47#include "odp-execute.h"
72865317
BP
48#include "odp-util.h"
49#include "ofp-print.h"
50#include "ofpbuf.h"
51#include "packets.h"
52#include "poll-loop.h"
26c6b6cd 53#include "random.h"
d33ed218 54#include "seq.h"
462278db 55#include "shash.h"
0cbfe35d 56#include "sset.h"
72865317 57#include "timeval.h"
74cc3969 58#include "unixctl.h"
72865317 59#include "util.h"
72865317 60#include "vlog.h"
5136ce49 61
d98e6007 62VLOG_DEFINE_THIS_MODULE(dpif_netdev);
72865317 63
2c0ea78f
GS
64/* By default, choose a priority in the middle. */
65#define NETDEV_RULE_PRIORITY 0x8000
66
72865317 67/* Configuration parameters. */
72865317
BP
68enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
69
70/* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP
71 * headers to be aligned on a 4-byte boundary. */
72enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
73
856081f6
BP
74/* Queues. */
75enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */
76enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */
77enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 };
78BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN));
79
8a4e3a85
BP
80/* Protects against changes to 'dp_netdevs'. */
81static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
82
83/* Contains all 'struct dp_netdev's. */
84static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
85 = SHASH_INITIALIZER(&dp_netdevs);
86
d88b629b
BP
87struct dp_netdev_upcall {
88 struct dpif_upcall upcall; /* Queued upcall information. */
89 struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */
90};
91
8a4e3a85
BP
92/* A queue passing packets from a struct dp_netdev to its clients.
93 *
94 *
95 * Thread-safety
96 * =============
97 *
98 * Any access at all requires the owning 'dp_netdev''s queue_mutex. */
856081f6 99struct dp_netdev_queue {
f5126b57
BP
100 struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN] OVS_GUARDED;
101 unsigned int head OVS_GUARDED;
102 unsigned int tail OVS_GUARDED;
856081f6
BP
103};
104
8a4e3a85
BP
105/* Datapath based on the network device interface from netdev.h.
106 *
107 *
108 * Thread-safety
109 * =============
110 *
111 * Some members, marked 'const', are immutable. Accessing other members
112 * requires synchronization, as noted in more detail below.
113 *
114 * Acquisition order is, from outermost to innermost:
115 *
116 * dp_netdev_mutex (global)
117 * port_rwlock
118 * flow_mutex
119 * cls.rwlock
120 * queue_mutex
121 */
72865317 122struct dp_netdev {
8a4e3a85
BP
123 const struct dpif_class *const class;
124 const char *const name;
6a8267c5
BP
125 struct ovs_refcount ref_cnt;
126 atomic_flag destroyed;
72865317 127
8a4e3a85
BP
128 /* Flows.
129 *
130 * Readers of 'cls' and 'flow_table' must take a 'cls->rwlock' read lock.
131 *
132 * Writers of 'cls' and 'flow_table' must take the 'flow_mutex' and then
133 * the 'cls->rwlock' write lock. (The outer 'flow_mutex' allows writers to
134 * atomically perform multiple operations on 'cls' and 'flow_table'.)
135 */
136 struct ovs_mutex flow_mutex;
137 struct classifier cls; /* Classifier. Protected by cls.rwlock. */
138 struct hmap flow_table OVS_GUARDED; /* Flow table. */
139
140 /* Queues.
141 *
142 * Everything in 'queues' is protected by 'queue_mutex'. */
f5126b57
BP
143 struct ovs_mutex queue_mutex;
144 struct dp_netdev_queue queues[N_QUEUES];
d33ed218 145 struct seq *queue_seq; /* Incremented whenever a packet is queued. */
72865317 146
8a4e3a85
BP
147 /* Statistics.
148 *
149 * ovsthread_counter is internally synchronized. */
ed27e010
BP
150 struct ovsthread_counter *n_hit; /* Number of flow table matches. */
151 struct ovsthread_counter *n_missed; /* Number of flow table misses. */
152 struct ovsthread_counter *n_lost; /* Number of misses not passed up. */
72865317 153
8a4e3a85
BP
154 /* Ports.
155 *
156 * Any lookup into 'ports' or any access to the dp_netdev_ports found
157 * through 'ports' requires taking 'port_rwlock'. */
158 struct ovs_rwlock port_rwlock;
159 struct hmap ports OVS_GUARDED;
d33ed218 160 struct seq *port_seq; /* Incremented whenever a port changes. */
72865317
BP
161};
162
8a4e3a85
BP
163static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
164 odp_port_t)
165 OVS_REQ_RDLOCK(dp->port_rwlock);
ff073a71 166
72865317
BP
167/* A port in a netdev-based datapath. */
168struct dp_netdev_port {
ff073a71
BP
169 struct hmap_node node; /* Node in dp_netdev's 'ports'. */
170 odp_port_t port_no;
72865317 171 struct netdev *netdev;
4b609110 172 struct netdev_saved_flags *sf;
796223f5 173 struct netdev_rx *rx;
0cbfe35d 174 char *type; /* Port type as requested by user. */
72865317
BP
175};
176
8a4e3a85
BP
177/* A flow in dp_netdev's 'flow_table'.
178 *
179 *
180 * Thread-safety
181 * =============
182 *
183 * Except near the beginning or ending of its lifespan, rule 'rule' belongs to
184 * its dp_netdev's classifier. The text below calls this classifier 'cls'.
185 *
186 * Motivation
187 * ----------
188 *
189 * The thread safety rules described here for "struct dp_netdev_flow" are
190 * motivated by two goals:
191 *
192 * - Prevent threads that read members of "struct dp_netdev_flow" from
193 * reading bad data due to changes by some thread concurrently modifying
194 * those members.
195 *
196 * - Prevent two threads making changes to members of a given "struct
197 * dp_netdev_flow" from interfering with each other.
198 *
199 *
200 * Rules
201 * -----
202 *
203 * A flow 'flow' may be accessed without a risk of being freed by code that
204 * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to
205 * 'flow->ref_cnt' (or both). Code that needs to hold onto a flow for a while
206 * should take 'cls->rwlock', find the flow it needs, increment 'flow->ref_cnt'
207 * with dpif_netdev_flow_ref(), and drop 'cls->rwlock'.
208 *
209 * 'flow->ref_cnt' protects 'flow' from being freed. It doesn't protect the
210 * flow from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't
211 * protect members of 'flow' from modification (that's 'flow->mutex').
212 *
213 * 'flow->mutex' protects the members of 'flow' from modification. It doesn't
214 * protect the flow from being deleted from 'cls' (that's 'cls->rwlock') and it
215 * doesn't prevent the flow from being freed (that's 'flow->ref_cnt').
216 *
217 * Some members, marked 'const', are immutable. Accessing other members
218 * requires synchronization, as noted in more detail below.
219 */
72865317 220struct dp_netdev_flow {
2c0ea78f 221 /* Packet classification. */
8a4e3a85 222 const struct cls_rule cr; /* In owning dp_netdev's 'cls'. */
2c0ea78f 223
8a4e3a85
BP
224 /* Hash table index by unmasked flow. */
225 const struct hmap_node node; /* In owning dp_netdev's 'flow_table'. */
226 const struct flow flow; /* The flow that created this entry. */
72865317 227
8a4e3a85
BP
228 /* Number of references.
229 * The classifier owns one reference.
230 * Any thread trying to keep a rule from being freed should hold its own
231 * reference. */
232 struct ovs_refcount ref_cnt;
72865317 233
8a4e3a85
BP
234 /* Protects members marked OVS_GUARDED.
235 *
236 * Acquire after datapath's flow_mutex. */
237 struct ovs_mutex mutex OVS_ACQ_AFTER(dp_netdev_mutex);
238
239 /* Statistics.
240 *
241 * Reading or writing these members requires 'mutex'. */
242 long long int used OVS_GUARDED; /* Last used time, in monotonic msecs. */
243 long long int packet_count OVS_GUARDED; /* Number of packets matched. */
244 long long int byte_count OVS_GUARDED; /* Number of bytes matched. */
245 uint16_t tcp_flags OVS_GUARDED; /* Bitwise-OR of seen tcp_flags values. */
246
247 /* Actions.
248 *
249 * Reading 'actions' requires 'mutex'.
250 * Writing 'actions' requires 'mutex' and (to allow for transactions) the
251 * datapath's flow_mutex. */
252 struct dp_netdev_actions *actions OVS_GUARDED;
72865317
BP
253};
254
8a4e3a85
BP
255static struct dp_netdev_flow *dp_netdev_flow_ref(
256 const struct dp_netdev_flow *);
257static void dp_netdev_flow_unref(struct dp_netdev_flow *);
258
a84cb64a
BP
259/* A set of datapath actions within a "struct dp_netdev_flow".
260 *
261 *
262 * Thread-safety
263 * =============
264 *
265 * A struct dp_netdev_actions 'actions' may be accessed without a risk of being
266 * freed by code that holds a read-lock or write-lock on 'flow->mutex' (where
267 * 'flow' is the dp_netdev_flow for which 'flow->actions == actions') or that
268 * owns a reference to 'actions->ref_cnt' (or both). */
269struct dp_netdev_actions {
270 struct ovs_refcount ref_cnt;
271
272 /* These members are immutable: they do not change during the struct's
273 * lifetime. */
274 struct nlattr *actions; /* Sequence of OVS_ACTION_ATTR_* attributes. */
275 unsigned int size; /* Size of 'actions', in bytes. */
276};
277
278struct dp_netdev_actions *dp_netdev_actions_create(const struct nlattr *,
279 size_t);
280struct dp_netdev_actions *dp_netdev_actions_ref(
281 const struct dp_netdev_actions *);
282void dp_netdev_actions_unref(struct dp_netdev_actions *);
283
72865317
BP
284/* Interface to netdev-based datapath. */
285struct dpif_netdev {
286 struct dpif dpif;
287 struct dp_netdev *dp;
d33ed218 288 uint64_t last_port_seq;
72865317
BP
289};
290
8a4e3a85
BP
291static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
292 struct dp_netdev_port **portp)
293 OVS_REQ_RDLOCK(dp->port_rwlock);
294static int get_port_by_name(struct dp_netdev *dp, const char *devname,
295 struct dp_netdev_port **portp)
296 OVS_REQ_RDLOCK(dp->port_rwlock);
297static void dp_netdev_free(struct dp_netdev *)
298 OVS_REQUIRES(dp_netdev_mutex);
72865317 299static void dp_netdev_flow_flush(struct dp_netdev *);
8a4e3a85
BP
300static int do_add_port(struct dp_netdev *dp, const char *devname,
301 const char *type, odp_port_t port_no)
302 OVS_REQ_WRLOCK(dp->port_rwlock);
303static int do_del_port(struct dp_netdev *dp, odp_port_t port_no)
304 OVS_REQ_WRLOCK(dp->port_rwlock);
614c4892
BP
305static int dpif_netdev_open(const struct dpif_class *, const char *name,
306 bool create, struct dpif **);
f5126b57 307static int dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *,
856081f6 308 int queue_no, const struct flow *,
f5126b57
BP
309 const struct nlattr *userdata)
310 OVS_EXCLUDED(dp->queue_mutex);
8a4e3a85
BP
311static void dp_netdev_execute_actions(struct dp_netdev *dp,
312 const struct flow *, struct ofpbuf *,
313 struct pkt_metadata *,
4edb9ae9 314 const struct nlattr *actions,
8a4e3a85
BP
315 size_t actions_len)
316 OVS_REQ_RDLOCK(dp->port_rwlock);
758c456d 317static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
8a4e3a85
BP
318 struct pkt_metadata *)
319 OVS_REQ_RDLOCK(dp->port_rwlock);
72865317
BP
320
321static struct dpif_netdev *
322dpif_netdev_cast(const struct dpif *dpif)
323{
cb22974d 324 ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
72865317
BP
325 return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
326}
327
328static struct dp_netdev *
329get_dp_netdev(const struct dpif *dpif)
330{
331 return dpif_netdev_cast(dpif)->dp;
332}
333
2197d7ab
GL
334static int
335dpif_netdev_enumerate(struct sset *all_dps)
336{
337 struct shash_node *node;
338
97be1538 339 ovs_mutex_lock(&dp_netdev_mutex);
2197d7ab
GL
340 SHASH_FOR_EACH(node, &dp_netdevs) {
341 sset_add(all_dps, node->name);
342 }
97be1538 343 ovs_mutex_unlock(&dp_netdev_mutex);
5279f8fd 344
2197d7ab
GL
345 return 0;
346}
347
add90f6f
EJ
348static bool
349dpif_netdev_class_is_dummy(const struct dpif_class *class)
350{
351 return class != &dpif_netdev_class;
352}
353
0aeaabc8
JP
354static const char *
355dpif_netdev_port_open_type(const struct dpif_class *class, const char *type)
356{
357 return strcmp(type, "internal") ? type
add90f6f 358 : dpif_netdev_class_is_dummy(class) ? "dummy"
0aeaabc8
JP
359 : "tap";
360}
361
72865317
BP
362static struct dpif *
363create_dpif_netdev(struct dp_netdev *dp)
364{
462278db 365 uint16_t netflow_id = hash_string(dp->name, 0);
72865317 366 struct dpif_netdev *dpif;
72865317 367
6a8267c5 368 ovs_refcount_ref(&dp->ref_cnt);
72865317 369
72865317 370 dpif = xmalloc(sizeof *dpif);
614c4892 371 dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
72865317 372 dpif->dp = dp;
d33ed218 373 dpif->last_port_seq = seq_read(dp->port_seq);
72865317
BP
374
375 return &dpif->dpif;
376}
377
4e022ec0
AW
378/* Choose an unused, non-zero port number and return it on success.
379 * Return ODPP_NONE on failure. */
380static odp_port_t
e44768b7 381choose_port(struct dp_netdev *dp, const char *name)
8a4e3a85 382 OVS_REQ_RDLOCK(dp->port_rwlock)
e44768b7 383{
4e022ec0 384 uint32_t port_no;
e44768b7
JP
385
386 if (dp->class != &dpif_netdev_class) {
387 const char *p;
388 int start_no = 0;
389
390 /* If the port name begins with "br", start the number search at
391 * 100 to make writing tests easier. */
392 if (!strncmp(name, "br", 2)) {
393 start_no = 100;
394 }
395
396 /* If the port name contains a number, try to assign that port number.
397 * This can make writing unit tests easier because port numbers are
398 * predictable. */
399 for (p = name; *p != '\0'; p++) {
400 if (isdigit((unsigned char) *p)) {
401 port_no = start_no + strtol(p, NULL, 10);
ff073a71
BP
402 if (port_no > 0 && port_no != odp_to_u32(ODPP_NONE)
403 && !dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 404 return u32_to_odp(port_no);
e44768b7
JP
405 }
406 break;
407 }
408 }
409 }
410
ff073a71
BP
411 for (port_no = 1; port_no <= UINT16_MAX; port_no++) {
412 if (!dp_netdev_lookup_port(dp, u32_to_odp(port_no))) {
4e022ec0 413 return u32_to_odp(port_no);
e44768b7
JP
414 }
415 }
416
4e022ec0 417 return ODPP_NONE;
e44768b7
JP
418}
419
72865317 420static int
614c4892
BP
421create_dp_netdev(const char *name, const struct dpif_class *class,
422 struct dp_netdev **dpp)
8a4e3a85 423 OVS_REQUIRES(dp_netdev_mutex)
72865317
BP
424{
425 struct dp_netdev *dp;
426 int error;
427 int i;
428
462278db 429 dp = xzalloc(sizeof *dp);
8a4e3a85
BP
430 shash_add(&dp_netdevs, name, dp);
431
432 *CONST_CAST(const struct dpif_class **, &dp->class) = class;
433 *CONST_CAST(const char **, &dp->name) = xstrdup(name);
6a8267c5
BP
434 ovs_refcount_init(&dp->ref_cnt);
435 atomic_flag_init(&dp->destroyed);
8a4e3a85
BP
436
437 ovs_mutex_init(&dp->flow_mutex);
438 classifier_init(&dp->cls, NULL);
439 hmap_init(&dp->flow_table);
440
f5126b57
BP
441 ovs_mutex_init(&dp->queue_mutex);
442 ovs_mutex_lock(&dp->queue_mutex);
72865317 443 for (i = 0; i < N_QUEUES; i++) {
856081f6 444 dp->queues[i].head = dp->queues[i].tail = 0;
72865317 445 }
f5126b57 446 ovs_mutex_unlock(&dp->queue_mutex);
d33ed218 447 dp->queue_seq = seq_create();
ed27e010
BP
448
449 dp->n_hit = ovsthread_counter_create();
450 dp->n_missed = ovsthread_counter_create();
451 dp->n_lost = ovsthread_counter_create();
452
8a4e3a85 453 ovs_rwlock_init(&dp->port_rwlock);
ff073a71 454 hmap_init(&dp->ports);
d33ed218 455 dp->port_seq = seq_create();
e44768b7 456
8a4e3a85 457 ovs_rwlock_wrlock(&dp->port_rwlock);
4e022ec0 458 error = do_add_port(dp, name, "internal", ODPP_LOCAL);
8a4e3a85 459 ovs_rwlock_unlock(&dp->port_rwlock);
72865317
BP
460 if (error) {
461 dp_netdev_free(dp);
462278db 462 return error;
72865317
BP
463 }
464
462278db 465 *dpp = dp;
72865317
BP
466 return 0;
467}
468
469static int
614c4892 470dpif_netdev_open(const struct dpif_class *class, const char *name,
4a387741 471 bool create, struct dpif **dpifp)
72865317 472{
462278db 473 struct dp_netdev *dp;
5279f8fd 474 int error;
462278db 475
97be1538 476 ovs_mutex_lock(&dp_netdev_mutex);
462278db
BP
477 dp = shash_find_data(&dp_netdevs, name);
478 if (!dp) {
5279f8fd 479 error = create ? create_dp_netdev(name, class, &dp) : ENODEV;
72865317 480 } else {
5279f8fd
BP
481 error = (dp->class != class ? EINVAL
482 : create ? EEXIST
483 : 0);
484 }
485 if (!error) {
486 *dpifp = create_dpif_netdev(dp);
72865317 487 }
97be1538 488 ovs_mutex_unlock(&dp_netdev_mutex);
462278db 489
5279f8fd 490 return error;
72865317
BP
491}
492
493static void
1ba530f4 494dp_netdev_purge_queues(struct dp_netdev *dp)
72865317
BP
495{
496 int i;
497
f5126b57 498 ovs_mutex_lock(&dp->queue_mutex);
72865317 499 for (i = 0; i < N_QUEUES; i++) {
856081f6 500 struct dp_netdev_queue *q = &dp->queues[i];
856081f6 501
1ba530f4 502 while (q->tail != q->head) {
d88b629b 503 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
da546e07 504 ofpbuf_uninit(&u->upcall.packet);
d88b629b 505 ofpbuf_uninit(&u->buf);
856081f6 506 }
72865317 507 }
f5126b57 508 ovs_mutex_unlock(&dp->queue_mutex);
1ba530f4
BP
509}
510
8a4e3a85
BP
511/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
512 * through the 'dp_netdevs' shash while freeing 'dp'. */
1ba530f4
BP
513static void
514dp_netdev_free(struct dp_netdev *dp)
8a4e3a85 515 OVS_REQUIRES(dp_netdev_mutex)
1ba530f4 516{
4ad28026
BP
517 struct dp_netdev_port *port, *next;
518
8a4e3a85
BP
519 shash_find_and_delete(&dp_netdevs, dp->name);
520
1ba530f4 521 dp_netdev_flow_flush(dp);
8a4e3a85 522 ovs_rwlock_wrlock(&dp->port_rwlock);
ff073a71 523 HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) {
1ba530f4
BP
524 do_del_port(dp, port->port_no);
525 }
8a4e3a85 526 ovs_rwlock_unlock(&dp->port_rwlock);
ed27e010
BP
527 ovsthread_counter_destroy(dp->n_hit);
528 ovsthread_counter_destroy(dp->n_missed);
529 ovsthread_counter_destroy(dp->n_lost);
f5126b57 530
1ba530f4 531 dp_netdev_purge_queues(dp);
d33ed218 532 seq_destroy(dp->queue_seq);
f5126b57
BP
533 ovs_mutex_destroy(&dp->queue_mutex);
534
2c0ea78f 535 classifier_destroy(&dp->cls);
72865317 536 hmap_destroy(&dp->flow_table);
8a4e3a85 537 ovs_mutex_destroy(&dp->flow_mutex);
d33ed218 538 seq_destroy(dp->port_seq);
ff073a71 539 hmap_destroy(&dp->ports);
6a8267c5
BP
540 atomic_flag_destroy(&dp->destroyed);
541 ovs_refcount_destroy(&dp->ref_cnt);
8a4e3a85 542 free(CONST_CAST(char *, dp->name));
72865317
BP
543 free(dp);
544}
545
8a4e3a85
BP
546static void
547dp_netdev_unref(struct dp_netdev *dp)
548{
549 if (dp) {
550 /* Take dp_netdev_mutex so that, if dp->ref_cnt falls to zero, we can't
551 * get a new reference to 'dp' through the 'dp_netdevs' shash. */
552 ovs_mutex_lock(&dp_netdev_mutex);
553 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
554 dp_netdev_free(dp);
555 }
556 ovs_mutex_unlock(&dp_netdev_mutex);
557 }
558}
559
72865317
BP
560static void
561dpif_netdev_close(struct dpif *dpif)
562{
563 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 564
8a4e3a85 565 dp_netdev_unref(dp);
72865317
BP
566 free(dpif);
567}
568
569static int
7dab847a 570dpif_netdev_destroy(struct dpif *dpif)
72865317
BP
571{
572 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 573
6a8267c5
BP
574 if (!atomic_flag_test_and_set(&dp->destroyed)) {
575 if (ovs_refcount_unref(&dp->ref_cnt) == 1) {
576 /* Can't happen: 'dpif' still owns a reference to 'dp'. */
577 OVS_NOT_REACHED();
578 }
579 }
5279f8fd 580
72865317
BP
581 return 0;
582}
583
584static int
a8d9304d 585dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)
72865317
BP
586{
587 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 588
8a4e3a85 589 ovs_rwlock_rdlock(&dp->cls.rwlock);
f180c2e2 590 stats->n_flows = hmap_count(&dp->flow_table);
8a4e3a85
BP
591 ovs_rwlock_unlock(&dp->cls.rwlock);
592
ed27e010
BP
593 stats->n_hit = ovsthread_counter_read(dp->n_hit);
594 stats->n_missed = ovsthread_counter_read(dp->n_missed);
595 stats->n_lost = ovsthread_counter_read(dp->n_lost);
1ce3fa06 596 stats->n_masks = UINT32_MAX;
847108dc 597 stats->n_mask_hit = UINT64_MAX;
5279f8fd 598
72865317
BP
599 return 0;
600}
601
72865317 602static int
c3827f61 603do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
4e022ec0 604 odp_port_t port_no)
8a4e3a85 605 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317 606{
4b609110 607 struct netdev_saved_flags *sf;
72865317
BP
608 struct dp_netdev_port *port;
609 struct netdev *netdev;
796223f5 610 struct netdev_rx *rx;
2499a8ce 611 enum netdev_flags flags;
0cbfe35d 612 const char *open_type;
72865317
BP
613 int error;
614
615 /* XXX reject devices already in some dp_netdev. */
616
617 /* Open and validate network device. */
0aeaabc8 618 open_type = dpif_netdev_port_open_type(dp->class, type);
0cbfe35d 619 error = netdev_open(devname, open_type, &netdev);
72865317
BP
620 if (error) {
621 return error;
622 }
72865317
BP
623 /* XXX reject non-Ethernet devices */
624
2499a8ce
AC
625 netdev_get_flags(netdev, &flags);
626 if (flags & NETDEV_LOOPBACK) {
627 VLOG_ERR("%s: cannot add a loopback device", devname);
628 netdev_close(netdev);
629 return EINVAL;
630 }
631
796223f5 632 error = netdev_rx_open(netdev, &rx);
add90f6f
EJ
633 if (error
634 && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) {
7b6b0ef4 635 VLOG_ERR("%s: cannot receive packets on this network device (%s)",
10a89ef0 636 devname, ovs_strerror(errno));
7b6b0ef4
BP
637 netdev_close(netdev);
638 return error;
639 }
640
4b609110 641 error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf);
72865317 642 if (error) {
796223f5 643 netdev_rx_close(rx);
72865317
BP
644 netdev_close(netdev);
645 return error;
646 }
647
648 port = xmalloc(sizeof *port);
649 port->port_no = port_no;
650 port->netdev = netdev;
4b609110 651 port->sf = sf;
796223f5 652 port->rx = rx;
0cbfe35d 653 port->type = xstrdup(type);
72865317 654
ff073a71 655 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 656 seq_change(dp->port_seq);
72865317
BP
657
658 return 0;
659}
660
247527db
BP
661static int
662dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
4e022ec0 663 odp_port_t *port_nop)
247527db
BP
664{
665 struct dp_netdev *dp = get_dp_netdev(dpif);
3aa30359
BP
666 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
667 const char *dpif_port;
4e022ec0 668 odp_port_t port_no;
5279f8fd 669 int error;
247527db 670
8a4e3a85 671 ovs_rwlock_wrlock(&dp->port_rwlock);
3aa30359 672 dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
4e022ec0 673 if (*port_nop != ODPP_NONE) {
ff073a71
BP
674 port_no = *port_nop;
675 error = dp_netdev_lookup_port(dp, *port_nop) ? EBUSY : 0;
232dfa4a 676 } else {
3aa30359 677 port_no = choose_port(dp, dpif_port);
5279f8fd 678 error = port_no == ODPP_NONE ? EFBIG : 0;
232dfa4a 679 }
5279f8fd 680 if (!error) {
247527db 681 *port_nop = port_no;
5279f8fd 682 error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no);
247527db 683 }
8a4e3a85 684 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
685
686 return error;
72865317
BP
687}
688
689static int
4e022ec0 690dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
72865317
BP
691{
692 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
693 int error;
694
8a4e3a85 695 ovs_rwlock_wrlock(&dp->port_rwlock);
5279f8fd 696 error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no);
8a4e3a85 697 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd
BP
698
699 return error;
72865317
BP
700}
701
702static bool
4e022ec0 703is_valid_port_number(odp_port_t port_no)
72865317 704{
ff073a71
BP
705 return port_no != ODPP_NONE;
706}
707
708static struct dp_netdev_port *
709dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 710 OVS_REQ_RDLOCK(dp->port_rwlock)
ff073a71
BP
711{
712 struct dp_netdev_port *port;
713
714 HMAP_FOR_EACH_IN_BUCKET (port, node, hash_int(odp_to_u32(port_no), 0),
715 &dp->ports) {
716 if (port->port_no == port_no) {
717 return port;
718 }
719 }
720 return NULL;
72865317
BP
721}
722
723static int
724get_port_by_number(struct dp_netdev *dp,
4e022ec0 725 odp_port_t port_no, struct dp_netdev_port **portp)
8a4e3a85 726 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
727{
728 if (!is_valid_port_number(port_no)) {
729 *portp = NULL;
730 return EINVAL;
731 } else {
ff073a71 732 *portp = dp_netdev_lookup_port(dp, port_no);
72865317
BP
733 return *portp ? 0 : ENOENT;
734 }
735}
736
737static int
738get_port_by_name(struct dp_netdev *dp,
739 const char *devname, struct dp_netdev_port **portp)
8a4e3a85 740 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317
BP
741{
742 struct dp_netdev_port *port;
743
ff073a71 744 HMAP_FOR_EACH (port, node, &dp->ports) {
3efb6063 745 if (!strcmp(netdev_get_name(port->netdev), devname)) {
72865317
BP
746 *portp = port;
747 return 0;
748 }
749 }
750 return ENOENT;
751}
752
753static int
4e022ec0 754do_del_port(struct dp_netdev *dp, odp_port_t port_no)
8a4e3a85 755 OVS_REQ_WRLOCK(dp->port_rwlock)
72865317
BP
756{
757 struct dp_netdev_port *port;
758 int error;
759
760 error = get_port_by_number(dp, port_no, &port);
761 if (error) {
762 return error;
763 }
764
ff073a71 765 hmap_remove(&dp->ports, &port->node);
d33ed218 766 seq_change(dp->port_seq);
72865317
BP
767
768 netdev_close(port->netdev);
4b609110 769 netdev_restore_flags(port->sf);
796223f5 770 netdev_rx_close(port->rx);
0cbfe35d 771 free(port->type);
72865317
BP
772 free(port);
773
774 return 0;
775}
776
777static void
4c738a8d
BP
778answer_port_query(const struct dp_netdev_port *port,
779 struct dpif_port *dpif_port)
72865317 780{
3efb6063 781 dpif_port->name = xstrdup(netdev_get_name(port->netdev));
0cbfe35d 782 dpif_port->type = xstrdup(port->type);
4c738a8d 783 dpif_port->port_no = port->port_no;
72865317
BP
784}
785
786static int
4e022ec0 787dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no,
4c738a8d 788 struct dpif_port *dpif_port)
72865317
BP
789{
790 struct dp_netdev *dp = get_dp_netdev(dpif);
791 struct dp_netdev_port *port;
792 int error;
793
8a4e3a85 794 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 795 error = get_port_by_number(dp, port_no, &port);
4afba28d 796 if (!error && dpif_port) {
4c738a8d 797 answer_port_query(port, dpif_port);
72865317 798 }
8a4e3a85 799 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 800
72865317
BP
801 return error;
802}
803
804static int
805dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname,
4c738a8d 806 struct dpif_port *dpif_port)
72865317
BP
807{
808 struct dp_netdev *dp = get_dp_netdev(dpif);
809 struct dp_netdev_port *port;
810 int error;
811
8a4e3a85 812 ovs_rwlock_rdlock(&dp->port_rwlock);
72865317 813 error = get_port_by_name(dp, devname, &port);
4afba28d 814 if (!error && dpif_port) {
4c738a8d 815 answer_port_query(port, dpif_port);
72865317 816 }
8a4e3a85 817 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 818
72865317
BP
819 return error;
820}
821
822static void
8a4e3a85
BP
823dp_netdev_remove_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow)
824 OVS_REQ_WRLOCK(dp->cls.rwlock)
825 OVS_REQUIRES(dp->flow_mutex)
72865317 826{
8a4e3a85
BP
827 struct cls_rule *cr = CONST_CAST(struct cls_rule *, &flow->cr);
828 struct hmap_node *node = CONST_CAST(struct hmap_node *, &flow->node);
2c0ea78f 829
8a4e3a85
BP
830 classifier_remove(&dp->cls, cr);
831 hmap_remove(&dp->flow_table, node);
832 dp_netdev_flow_unref(flow);
833}
834
835static struct dp_netdev_flow *
836dp_netdev_flow_ref(const struct dp_netdev_flow *flow_)
837{
838 struct dp_netdev_flow *flow = CONST_CAST(struct dp_netdev_flow *, flow_);
839 if (flow) {
840 ovs_refcount_ref(&flow->ref_cnt);
841 }
842 return flow;
843}
844
845static void
846dp_netdev_flow_unref(struct dp_netdev_flow *flow)
847{
848 if (flow && ovs_refcount_unref(&flow->ref_cnt) == 1) {
849 cls_rule_destroy(CONST_CAST(struct cls_rule *, &flow->cr));
850 ovs_mutex_lock(&flow->mutex);
851 dp_netdev_actions_unref(flow->actions);
852 ovs_mutex_unlock(&flow->mutex);
853 ovs_mutex_destroy(&flow->mutex);
854 free(flow);
855 }
72865317
BP
856}
857
858static void
859dp_netdev_flow_flush(struct dp_netdev *dp)
860{
1763b4b8 861 struct dp_netdev_flow *netdev_flow, *next;
72865317 862
8a4e3a85
BP
863 ovs_mutex_lock(&dp->flow_mutex);
864 ovs_rwlock_wrlock(&dp->cls.rwlock);
1763b4b8 865 HMAP_FOR_EACH_SAFE (netdev_flow, next, node, &dp->flow_table) {
8a4e3a85 866 dp_netdev_remove_flow(dp, netdev_flow);
72865317 867 }
8a4e3a85
BP
868 ovs_rwlock_unlock(&dp->cls.rwlock);
869 ovs_mutex_unlock(&dp->flow_mutex);
72865317
BP
870}
871
872static int
873dpif_netdev_flow_flush(struct dpif *dpif)
874{
875 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd 876
72865317
BP
877 dp_netdev_flow_flush(dp);
878 return 0;
879}
880
b0ec0f27 881struct dp_netdev_port_state {
ff073a71
BP
882 uint32_t bucket;
883 uint32_t offset;
4c738a8d 884 char *name;
b0ec0f27
BP
885};
886
887static int
888dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
889{
890 *statep = xzalloc(sizeof(struct dp_netdev_port_state));
891 return 0;
892}
893
72865317 894static int
b0ec0f27 895dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_,
4c738a8d 896 struct dpif_port *dpif_port)
72865317 897{
b0ec0f27 898 struct dp_netdev_port_state *state = state_;
72865317 899 struct dp_netdev *dp = get_dp_netdev(dpif);
ff073a71
BP
900 struct hmap_node *node;
901 int retval;
72865317 902
8a4e3a85 903 ovs_rwlock_rdlock(&dp->port_rwlock);
ff073a71
BP
904 node = hmap_at_position(&dp->ports, &state->bucket, &state->offset);
905 if (node) {
906 struct dp_netdev_port *port;
5279f8fd 907
ff073a71
BP
908 port = CONTAINER_OF(node, struct dp_netdev_port, node);
909
910 free(state->name);
911 state->name = xstrdup(netdev_get_name(port->netdev));
912 dpif_port->name = state->name;
913 dpif_port->type = port->type;
914 dpif_port->port_no = port->port_no;
915
916 retval = 0;
917 } else {
918 retval = EOF;
72865317 919 }
8a4e3a85 920 ovs_rwlock_unlock(&dp->port_rwlock);
5279f8fd 921
ff073a71 922 return retval;
b0ec0f27
BP
923}
924
925static int
4c738a8d 926dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
b0ec0f27 927{
4c738a8d
BP
928 struct dp_netdev_port_state *state = state_;
929 free(state->name);
b0ec0f27
BP
930 free(state);
931 return 0;
72865317
BP
932}
933
934static int
67a4917b 935dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED)
72865317
BP
936{
937 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
d33ed218 938 uint64_t new_port_seq;
5279f8fd
BP
939 int error;
940
d33ed218
BP
941 new_port_seq = seq_read(dpif->dp->port_seq);
942 if (dpif->last_port_seq != new_port_seq) {
943 dpif->last_port_seq = new_port_seq;
5279f8fd 944 error = ENOBUFS;
72865317 945 } else {
5279f8fd 946 error = EAGAIN;
72865317 947 }
5279f8fd
BP
948
949 return error;
72865317
BP
950}
951
952static void
953dpif_netdev_port_poll_wait(const struct dpif *dpif_)
954{
955 struct dpif_netdev *dpif = dpif_netdev_cast(dpif_);
5279f8fd 956
d33ed218 957 seq_wait(dpif->dp->port_seq, dpif->last_port_seq);
8a4e3a85
BP
958}
959
960static struct dp_netdev_flow *
961dp_netdev_flow_cast(const struct cls_rule *cr)
962{
963 return cr ? CONTAINER_OF(cr, struct dp_netdev_flow, cr) : NULL;
72865317
BP
964}
965
72865317 966static struct dp_netdev_flow *
2c0ea78f 967dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 968 OVS_EXCLUDED(dp->cls.rwlock)
2c0ea78f 969{
8a4e3a85 970 struct dp_netdev_flow *netdev_flow;
2c0ea78f 971
8a4e3a85
BP
972 ovs_rwlock_rdlock(&dp->cls.rwlock);
973 netdev_flow = dp_netdev_flow_cast(classifier_lookup(&dp->cls, flow, NULL));
974 dp_netdev_flow_ref(netdev_flow);
2c0ea78f
GS
975 ovs_rwlock_unlock(&dp->cls.rwlock);
976
8a4e3a85 977 return netdev_flow;
2c0ea78f
GS
978}
979
980static struct dp_netdev_flow *
981dp_netdev_find_flow(const struct dp_netdev *dp, const struct flow *flow)
8a4e3a85 982 OVS_REQ_RDLOCK(dp->cls.rwlock)
72865317 983{
1763b4b8 984 struct dp_netdev_flow *netdev_flow;
72865317 985
2c0ea78f 986 HMAP_FOR_EACH_WITH_HASH (netdev_flow, node, flow_hash(flow, 0),
1763b4b8 987 &dp->flow_table) {
2c0ea78f 988 if (flow_equal(&netdev_flow->flow, flow)) {
8a4e3a85 989 return dp_netdev_flow_ref(netdev_flow);
72865317
BP
990 }
991 }
8a4e3a85 992
72865317
BP
993 return NULL;
994}
995
996static void
1763b4b8
GS
997get_dpif_flow_stats(struct dp_netdev_flow *netdev_flow,
998 struct dpif_flow_stats *stats)
8a4e3a85 999 OVS_REQ_RDLOCK(netdev_flow->mutex)
feebdea2 1000{
1763b4b8
GS
1001 stats->n_packets = netdev_flow->packet_count;
1002 stats->n_bytes = netdev_flow->byte_count;
1003 stats->used = netdev_flow->used;
1004 stats->tcp_flags = netdev_flow->tcp_flags;
72865317
BP
1005}
1006
36956a7d 1007static int
8c301900
JR
1008dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1009 const struct nlattr *mask_key,
1010 uint32_t mask_key_len, const struct flow *flow,
1011 struct flow *mask)
1012{
1013 if (mask_key_len) {
1014 if (odp_flow_key_to_mask(mask_key, mask_key_len, mask, flow)) {
1015 /* This should not happen: it indicates that
1016 * odp_flow_key_from_mask() and odp_flow_key_to_mask()
1017 * disagree on the acceptable form of a mask. Log the problem
1018 * as an error, with enough details to enable debugging. */
1019 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1020
1021 if (!VLOG_DROP_ERR(&rl)) {
1022 struct ds s;
1023
1024 ds_init(&s);
1025 odp_flow_format(key, key_len, mask_key, mask_key_len, NULL, &s,
1026 true);
1027 VLOG_ERR("internal error parsing flow mask %s", ds_cstr(&s));
1028 ds_destroy(&s);
1029 }
1030
1031 return EINVAL;
1032 }
1033 /* Force unwildcard the in_port. */
1034 mask->in_port.odp_port = u32_to_odp(UINT32_MAX);
1035 } else {
1036 enum mf_field_id id;
1037 /* No mask key, unwildcard everything except fields whose
1038 * prerequisities are not met. */
1039 memset(mask, 0x0, sizeof *mask);
1040
1041 for (id = 0; id < MFF_N_IDS; ++id) {
1042 /* Skip registers and metadata. */
1043 if (!(id >= MFF_REG0 && id < MFF_REG0 + FLOW_N_REGS)
1044 && id != MFF_METADATA) {
1045 const struct mf_field *mf = mf_from_id(id);
1046 if (mf_are_prereqs_ok(mf, flow)) {
1047 mf_mask_field(mf, mask);
1048 }
1049 }
1050 }
1051 }
1052
1053 return 0;
1054}
1055
1056static int
1057dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len,
1058 struct flow *flow)
36956a7d 1059{
586ddea5
BP
1060 odp_port_t in_port;
1061
8c301900 1062 if (odp_flow_key_to_flow(key, key_len, flow)) {
36956a7d 1063 /* This should not happen: it indicates that odp_flow_key_from_flow()
8c301900
JR
1064 * and odp_flow_key_to_flow() disagree on the acceptable form of a
1065 * flow. Log the problem as an error, with enough details to enable
1066 * debugging. */
36956a7d
BP
1067 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1068
1069 if (!VLOG_DROP_ERR(&rl)) {
1070 struct ds s;
1071
1072 ds_init(&s);
8c301900 1073 odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
36956a7d
BP
1074 VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
1075 ds_destroy(&s);
1076 }
1077
1078 return EINVAL;
1079 }
1080
586ddea5
BP
1081 in_port = flow->in_port.odp_port;
1082 if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) {
18886b60
BP
1083 return EINVAL;
1084 }
1085
36956a7d
BP
1086 return 0;
1087}
1088
72865317 1089static int
693c4a01 1090dpif_netdev_flow_get(const struct dpif *dpif,
feebdea2 1091 const struct nlattr *nl_key, size_t nl_key_len,
c97fb132 1092 struct ofpbuf **actionsp, struct dpif_flow_stats *stats)
72865317
BP
1093{
1094 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1095 struct dp_netdev_flow *netdev_flow;
bc4a05c6
BP
1096 struct flow key;
1097 int error;
36956a7d 1098
feebdea2 1099 error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key);
bc4a05c6
BP
1100 if (error) {
1101 return error;
1102 }
14608a15 1103
8a4e3a85 1104 ovs_rwlock_rdlock(&dp->cls.rwlock);
2c0ea78f 1105 netdev_flow = dp_netdev_find_flow(dp, &key);
8a4e3a85
BP
1106 ovs_rwlock_unlock(&dp->cls.rwlock);
1107
1763b4b8 1108 if (netdev_flow) {
8a4e3a85
BP
1109 struct dp_netdev_actions *actions = NULL;
1110
1111 ovs_mutex_lock(&netdev_flow->mutex);
5279f8fd 1112 if (stats) {
1763b4b8 1113 get_dpif_flow_stats(netdev_flow, stats);
5279f8fd
BP
1114 }
1115 if (actionsp) {
8a4e3a85
BP
1116 actions = dp_netdev_actions_ref(netdev_flow->actions);
1117 }
1118 ovs_mutex_unlock(&netdev_flow->mutex);
1119
1120 dp_netdev_flow_unref(netdev_flow);
1121
1122 if (actionsp) {
1123 *actionsp = ofpbuf_clone_data(actions->actions, actions->size);
1124 dp_netdev_actions_unref(actions);
5279f8fd
BP
1125 }
1126 } else {
1127 error = ENOENT;
72865317 1128 }
bc4a05c6 1129
5279f8fd 1130 return error;
72865317
BP
1131}
1132
72865317 1133static int
2c0ea78f
GS
1134dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *flow,
1135 const struct flow_wildcards *wc,
1136 const struct nlattr *actions,
1137 size_t actions_len)
8a4e3a85 1138 OVS_REQUIRES(dp->flow_mutex)
72865317 1139{
1763b4b8 1140 struct dp_netdev_flow *netdev_flow;
2c0ea78f 1141 struct match match;
72865317 1142
1763b4b8 1143 netdev_flow = xzalloc(sizeof *netdev_flow);
8a4e3a85
BP
1144 *CONST_CAST(struct flow *, &netdev_flow->flow) = *flow;
1145 ovs_refcount_init(&netdev_flow->ref_cnt);
1146
1147 ovs_mutex_init(&netdev_flow->mutex);
1148 ovs_mutex_lock(&netdev_flow->mutex);
1149
a84cb64a 1150 netdev_flow->actions = dp_netdev_actions_create(actions, actions_len);
2c0ea78f
GS
1151
1152 match_init(&match, flow, wc);
8a4e3a85
BP
1153 cls_rule_init(CONST_CAST(struct cls_rule *, &netdev_flow->cr),
1154 &match, NETDEV_RULE_PRIORITY);
2c0ea78f 1155 ovs_rwlock_wrlock(&dp->cls.rwlock);
8a4e3a85
BP
1156 classifier_insert(&dp->cls,
1157 CONST_CAST(struct cls_rule *, &netdev_flow->cr));
1158 hmap_insert(&dp->flow_table,
1159 CONST_CAST(struct hmap_node *, &netdev_flow->node),
1160 flow_hash(flow, 0));
2c0ea78f 1161 ovs_rwlock_unlock(&dp->cls.rwlock);
72865317 1162
8a4e3a85
BP
1163 ovs_mutex_unlock(&netdev_flow->mutex);
1164
72865317
BP
1165 return 0;
1166}
1167
1168static void
1763b4b8 1169clear_stats(struct dp_netdev_flow *netdev_flow)
8a4e3a85 1170 OVS_REQUIRES(netdev_flow->mutex)
72865317 1171{
1763b4b8
GS
1172 netdev_flow->used = 0;
1173 netdev_flow->packet_count = 0;
1174 netdev_flow->byte_count = 0;
1175 netdev_flow->tcp_flags = 0;
72865317
BP
1176}
1177
1178static int
89625d1e 1179dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put)
72865317
BP
1180{
1181 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1182 struct dp_netdev_flow *netdev_flow;
2c0ea78f
GS
1183 struct flow flow;
1184 struct flow_wildcards wc;
36956a7d
BP
1185 int error;
1186
8c301900
JR
1187 error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &flow);
1188 if (error) {
1189 return error;
1190 }
1191 error = dpif_netdev_mask_from_nlattrs(put->key, put->key_len,
1192 put->mask, put->mask_len,
1193 &flow, &wc.masks);
36956a7d
BP
1194 if (error) {
1195 return error;
1196 }
72865317 1197
8a4e3a85 1198 ovs_mutex_lock(&dp->flow_mutex);
2c0ea78f 1199 netdev_flow = dp_netdev_lookup_flow(dp, &flow);
1763b4b8 1200 if (!netdev_flow) {
89625d1e 1201 if (put->flags & DPIF_FP_CREATE) {
72865317 1202 if (hmap_count(&dp->flow_table) < MAX_FLOWS) {
89625d1e
BP
1203 if (put->stats) {
1204 memset(put->stats, 0, sizeof *put->stats);
feebdea2 1205 }
2c0ea78f 1206 error = dp_netdev_flow_add(dp, &flow, &wc, put->actions,
5279f8fd 1207 put->actions_len);
72865317 1208 } else {
5279f8fd 1209 error = EFBIG;
72865317
BP
1210 }
1211 } else {
5279f8fd 1212 error = ENOENT;
72865317
BP
1213 }
1214 } else {
2c0ea78f
GS
1215 if (put->flags & DPIF_FP_MODIFY
1216 && flow_equal(&flow, &netdev_flow->flow)) {
8a4e3a85
BP
1217 struct dp_netdev_actions *new_actions;
1218 struct dp_netdev_actions *old_actions;
1219
1220 new_actions = dp_netdev_actions_create(put->actions,
1221 put->actions_len);
1222
1223 ovs_mutex_lock(&netdev_flow->mutex);
1224 old_actions = netdev_flow->actions;
1225 netdev_flow->actions = new_actions;
a84cb64a
BP
1226 if (put->stats) {
1227 get_dpif_flow_stats(netdev_flow, put->stats);
1228 }
1229 if (put->flags & DPIF_FP_ZERO_STATS) {
1230 clear_stats(netdev_flow);
72865317 1231 }
8a4e3a85
BP
1232 ovs_mutex_unlock(&netdev_flow->mutex);
1233
1234 dp_netdev_actions_unref(old_actions);
2c0ea78f 1235 } else if (put->flags & DPIF_FP_CREATE) {
5279f8fd 1236 error = EEXIST;
2c0ea78f
GS
1237 } else {
1238 /* Overlapping flow. */
1239 error = EINVAL;
72865317 1240 }
8a4e3a85 1241 dp_netdev_flow_unref(netdev_flow);
72865317 1242 }
8a4e3a85 1243 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1244
1245 return error;
72865317
BP
1246}
1247
72865317 1248static int
b99d3cee 1249dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del)
72865317
BP
1250{
1251 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1252 struct dp_netdev_flow *netdev_flow;
14608a15 1253 struct flow key;
36956a7d
BP
1254 int error;
1255
b99d3cee 1256 error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key);
36956a7d
BP
1257 if (error) {
1258 return error;
1259 }
72865317 1260
8a4e3a85
BP
1261 ovs_mutex_lock(&dp->flow_mutex);
1262 ovs_rwlock_wrlock(&dp->cls.rwlock);
2c0ea78f 1263 netdev_flow = dp_netdev_find_flow(dp, &key);
1763b4b8 1264 if (netdev_flow) {
b99d3cee 1265 if (del->stats) {
8a4e3a85 1266 ovs_mutex_lock(&netdev_flow->mutex);
1763b4b8 1267 get_dpif_flow_stats(netdev_flow, del->stats);
8a4e3a85 1268 ovs_mutex_unlock(&netdev_flow->mutex);
feebdea2 1269 }
8a4e3a85 1270 dp_netdev_remove_flow(dp, netdev_flow);
72865317 1271 } else {
5279f8fd 1272 error = ENOENT;
72865317 1273 }
8a4e3a85
BP
1274 ovs_rwlock_unlock(&dp->cls.rwlock);
1275 ovs_mutex_unlock(&dp->flow_mutex);
5279f8fd
BP
1276
1277 return error;
72865317
BP
1278}
1279
704a1e09
BP
1280struct dp_netdev_flow_state {
1281 uint32_t bucket;
1282 uint32_t offset;
a84cb64a 1283 struct dp_netdev_actions *actions;
19cf4069 1284 struct odputil_keybuf keybuf;
2c0ea78f 1285 struct odputil_keybuf maskbuf;
c97fb132 1286 struct dpif_flow_stats stats;
704a1e09
BP
1287};
1288
72865317 1289static int
704a1e09 1290dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep)
72865317 1291{
feebdea2
BP
1292 struct dp_netdev_flow_state *state;
1293
1294 *statep = state = xmalloc(sizeof *state);
1295 state->bucket = 0;
1296 state->offset = 0;
1297 state->actions = NULL;
704a1e09
BP
1298 return 0;
1299}
1300
1301static int
1302dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_,
feebdea2 1303 const struct nlattr **key, size_t *key_len,
e6cc0bab 1304 const struct nlattr **mask, size_t *mask_len,
feebdea2 1305 const struct nlattr **actions, size_t *actions_len,
c97fb132 1306 const struct dpif_flow_stats **stats)
704a1e09
BP
1307{
1308 struct dp_netdev_flow_state *state = state_;
72865317 1309 struct dp_netdev *dp = get_dp_netdev(dpif);
1763b4b8 1310 struct dp_netdev_flow *netdev_flow;
704a1e09 1311 struct hmap_node *node;
14608a15 1312
8a4e3a85 1313 ovs_rwlock_rdlock(&dp->cls.rwlock);
704a1e09 1314 node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset);
8a4e3a85
BP
1315 if (node) {
1316 netdev_flow = CONTAINER_OF(node, struct dp_netdev_flow, node);
1317 dp_netdev_flow_ref(netdev_flow);
1318 }
1319 ovs_rwlock_unlock(&dp->cls.rwlock);
704a1e09
BP
1320 if (!node) {
1321 return EOF;
72865317 1322 }
704a1e09 1323
feebdea2
BP
1324 if (key) {
1325 struct ofpbuf buf;
1326
19cf4069 1327 ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf);
2c0ea78f
GS
1328 odp_flow_key_from_flow(&buf, &netdev_flow->flow,
1329 netdev_flow->flow.in_port.odp_port);
36956a7d 1330
feebdea2
BP
1331 *key = buf.data;
1332 *key_len = buf.size;
1333 }
1334
2c0ea78f
GS
1335 if (key && mask) {
1336 struct ofpbuf buf;
1337 struct flow_wildcards wc;
1338
1339 ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf);
1340 minimask_expand(&netdev_flow->cr.match.mask, &wc);
1341 odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow,
1342 odp_to_u32(wc.masks.in_port.odp_port));
1343
1344 *mask = buf.data;
1345 *mask_len = buf.size;
e6cc0bab
AZ
1346 }
1347
8a4e3a85 1348 if (actions || stats) {
a84cb64a 1349 dp_netdev_actions_unref(state->actions);
8a4e3a85 1350 state->actions = NULL;
feebdea2 1351
8a4e3a85
BP
1352 ovs_mutex_lock(&netdev_flow->mutex);
1353 if (actions) {
1354 state->actions = dp_netdev_actions_ref(netdev_flow->actions);
1355 *actions = state->actions->actions;
1356 *actions_len = state->actions->size;
1357 }
1358 if (stats) {
1359 get_dpif_flow_stats(netdev_flow, &state->stats);
1360 *stats = &state->stats;
1361 }
1362 ovs_mutex_unlock(&netdev_flow->mutex);
feebdea2 1363 }
704a1e09 1364
8a4e3a85
BP
1365 dp_netdev_flow_unref(netdev_flow);
1366
704a1e09
BP
1367 return 0;
1368}
1369
1370static int
feebdea2 1371dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
704a1e09 1372{
feebdea2
BP
1373 struct dp_netdev_flow_state *state = state_;
1374
a84cb64a 1375 dp_netdev_actions_unref(state->actions);
704a1e09
BP
1376 free(state);
1377 return 0;
72865317
BP
1378}
1379
1380static int
758c456d 1381dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
72865317
BP
1382{
1383 struct dp_netdev *dp = get_dp_netdev(dpif);
758c456d
JR
1384 struct pkt_metadata *md = &execute->md;
1385 struct flow key;
72865317 1386
89625d1e
BP
1387 if (execute->packet->size < ETH_HEADER_LEN ||
1388 execute->packet->size > UINT16_MAX) {
72865317
BP
1389 return EINVAL;
1390 }
1391
758c456d
JR
1392 /* Extract flow key. */
1393 flow_extract(execute->packet, md->skb_priority, md->pkt_mark, &md->tunnel,
1394 (union flow_in_port *)&md->in_port, &key);
8a4e3a85
BP
1395
1396 ovs_rwlock_rdlock(&dp->port_rwlock);
758c456d
JR
1397 dp_netdev_execute_actions(dp, &key, execute->packet, md, execute->actions,
1398 execute->actions_len);
8a4e3a85
BP
1399 ovs_rwlock_unlock(&dp->port_rwlock);
1400
758c456d 1401 return 0;
72865317
BP
1402}
1403
1404static int
a12b3ead 1405dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED)
72865317 1406{
82272ede 1407 return 0;
72865317
BP
1408}
1409
5bf93d67
EJ
1410static int
1411dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED,
1412 uint32_t queue_id, uint32_t *priority)
1413{
1414 *priority = queue_id;
1415 return 0;
1416}
1417
856081f6 1418static struct dp_netdev_queue *
f5126b57
BP
1419find_nonempty_queue(struct dp_netdev *dp)
1420 OVS_REQUIRES(dp->queue_mutex)
72865317 1421{
72865317
BP
1422 int i;
1423
1424 for (i = 0; i < N_QUEUES; i++) {
856081f6 1425 struct dp_netdev_queue *q = &dp->queues[i];
a12b3ead 1426 if (q->head != q->tail) {
856081f6 1427 return q;
72865317
BP
1428 }
1429 }
856081f6 1430 return NULL;
72865317
BP
1431}
1432
1433static int
90a7c55e
BP
1434dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall,
1435 struct ofpbuf *buf)
72865317 1436{
f5126b57 1437 struct dp_netdev *dp = get_dp_netdev(dpif);
5279f8fd
BP
1438 struct dp_netdev_queue *q;
1439 int error;
1440
f5126b57
BP
1441 ovs_mutex_lock(&dp->queue_mutex);
1442 q = find_nonempty_queue(dp);
856081f6 1443 if (q) {
d88b629b
BP
1444 struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK];
1445
1446 *upcall = u->upcall;
b3907fbc 1447
90a7c55e 1448 ofpbuf_uninit(buf);
d88b629b 1449 *buf = u->buf;
90a7c55e 1450
5279f8fd 1451 error = 0;
72865317 1452 } else {
5279f8fd 1453 error = EAGAIN;
72865317 1454 }
f5126b57 1455 ovs_mutex_unlock(&dp->queue_mutex);
5279f8fd
BP
1456
1457 return error;
72865317
BP
1458}
1459
1460static void
1461dpif_netdev_recv_wait(struct dpif *dpif)
1462{
d33ed218
BP
1463 struct dp_netdev *dp = get_dp_netdev(dpif);
1464 uint64_t seq;
5279f8fd 1465
f5126b57 1466 ovs_mutex_lock(&dp->queue_mutex);
d33ed218 1467 seq = seq_read(dp->queue_seq);
f5126b57 1468 if (find_nonempty_queue(dp)) {
72865317 1469 poll_immediate_wake();
d33ed218
BP
1470 } else {
1471 seq_wait(dp->queue_seq, seq);
72865317 1472 }
f5126b57 1473 ovs_mutex_unlock(&dp->queue_mutex);
72865317 1474}
1ba530f4
BP
1475
1476static void
1477dpif_netdev_recv_purge(struct dpif *dpif)
1478{
1479 struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif);
f5126b57 1480
1ba530f4
BP
1481 dp_netdev_purge_queues(dpif_netdev->dp);
1482}
72865317 1483\f
a84cb64a
BP
1484/* Creates and returns a new 'struct dp_netdev_actions', with a reference count
1485 * of 1, whose actions are a copy of from the 'ofpacts_len' bytes of
1486 * 'ofpacts'. */
1487struct dp_netdev_actions *
1488dp_netdev_actions_create(const struct nlattr *actions, size_t size)
1489{
1490 struct dp_netdev_actions *netdev_actions;
1491
1492 netdev_actions = xmalloc(sizeof *netdev_actions);
1493 ovs_refcount_init(&netdev_actions->ref_cnt);
1494 netdev_actions->actions = xmemdup(actions, size);
1495 netdev_actions->size = size;
1496
1497 return netdev_actions;
1498}
1499
1500/* Increments 'actions''s refcount. */
1501struct dp_netdev_actions *
1502dp_netdev_actions_ref(const struct dp_netdev_actions *actions_)
1503{
1504 struct dp_netdev_actions *actions;
1505
1506 actions = CONST_CAST(struct dp_netdev_actions *, actions_);
1507 if (actions) {
1508 ovs_refcount_ref(&actions->ref_cnt);
1509 }
1510 return actions;
1511}
1512
1513/* Decrements 'actions''s refcount and frees 'actions' if the refcount reaches
1514 * 0. */
1515void
1516dp_netdev_actions_unref(struct dp_netdev_actions *actions)
1517{
1518 if (actions && ovs_refcount_unref(&actions->ref_cnt) == 1) {
1519 free(actions->actions);
1520 free(actions);
1521 }
1522}
1523\f
72865317 1524static void
1763b4b8
GS
1525dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
1526 const struct ofpbuf *packet)
8a4e3a85 1527 OVS_REQUIRES(netdev_flow->mutex)
72865317 1528{
1763b4b8
GS
1529 netdev_flow->used = time_msec();
1530 netdev_flow->packet_count++;
1531 netdev_flow->byte_count += packet->size;
2c0ea78f 1532 netdev_flow->tcp_flags |= packet_get_tcp_flags(packet, &netdev_flow->flow);
72865317
BP
1533}
1534
1535static void
758c456d
JR
1536dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
1537 struct pkt_metadata *md)
8a4e3a85 1538 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1539{
1763b4b8 1540 struct dp_netdev_flow *netdev_flow;
14608a15 1541 struct flow key;
72865317 1542
1805876e
BP
1543 if (packet->size < ETH_HEADER_LEN) {
1544 return;
1545 }
758c456d
JR
1546 flow_extract(packet, md->skb_priority, md->pkt_mark, &md->tunnel,
1547 (union flow_in_port *)&md->in_port, &key);
1763b4b8
GS
1548 netdev_flow = dp_netdev_lookup_flow(dp, &key);
1549 if (netdev_flow) {
a84cb64a
BP
1550 struct dp_netdev_actions *actions;
1551
8a4e3a85 1552 ovs_mutex_lock(&netdev_flow->mutex);
1763b4b8 1553 dp_netdev_flow_used(netdev_flow, packet);
a84cb64a 1554 actions = dp_netdev_actions_ref(netdev_flow->actions);
8a4e3a85
BP
1555 ovs_mutex_unlock(&netdev_flow->mutex);
1556
758c456d 1557 dp_netdev_execute_actions(dp, &key, packet, md,
a84cb64a
BP
1558 actions->actions, actions->size);
1559 dp_netdev_actions_unref(actions);
ed27e010 1560 ovsthread_counter_inc(dp->n_hit, 1);
72865317 1561 } else {
ed27e010 1562 ovsthread_counter_inc(dp->n_missed, 1);
e995e3df 1563 dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
72865317
BP
1564 }
1565}
1566
1567static void
640e1b20 1568dpif_netdev_run(struct dpif *dpif)
72865317 1569{
640e1b20 1570 struct dp_netdev_port *port;
5279f8fd 1571 struct dp_netdev *dp;
72865317 1572 struct ofpbuf packet;
72865317 1573
5279f8fd 1574 dp = get_dp_netdev(dpif);
da546e07
JR
1575 ofpbuf_init(&packet, 0);
1576
8a4e3a85 1577 ovs_rwlock_rdlock(&dp->port_rwlock);
ff073a71 1578 HMAP_FOR_EACH (port, node, &dp->ports) {
5c8d2fca 1579 int buf_size;
640e1b20 1580 int error;
5c8d2fca
BP
1581 int mtu;
1582
1583 error = netdev_get_mtu(port->netdev, &mtu);
1584 if (error) {
1585 mtu = ETH_PAYLOAD_MAX;
1586 }
1587 buf_size = DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + mtu;
640e1b20 1588
640e1b20 1589 ofpbuf_clear(&packet);
da546e07 1590 ofpbuf_reserve_with_tailroom(&packet, DP_NETDEV_HEADROOM, buf_size);
640e1b20 1591
796223f5 1592 error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP;
640e1b20 1593 if (!error) {
758c456d
JR
1594 struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
1595 dp_netdev_port_input(dp, &packet, &md);
640e1b20
BP
1596 } else if (error != EAGAIN && error != EOPNOTSUPP) {
1597 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3aa30359 1598
640e1b20 1599 VLOG_ERR_RL(&rl, "error receiving data from %s: %s",
10a89ef0 1600 netdev_get_name(port->netdev), ovs_strerror(error));
72865317
BP
1601 }
1602 }
8a4e3a85
BP
1603 ovs_rwlock_unlock(&dp->port_rwlock);
1604
72865317
BP
1605 ofpbuf_uninit(&packet);
1606}
1607
1608static void
640e1b20 1609dpif_netdev_wait(struct dpif *dpif)
72865317 1610{
640e1b20 1611 struct dp_netdev_port *port;
462278db 1612
5279f8fd
BP
1613 /* There is a race here, if thread A calls dpif_netdev_wait(dpif) and
1614 * thread B calls dpif_port_add(dpif) or dpif_port_remove(dpif) before
1615 * A makes it to poll_block().
1616 *
1617 * But I think it doesn't matter:
1618 *
1619 * - In the dpif_port_add() case, A will not wake up when a packet
1620 * arrives on the new port, but this would also happen if the
1621 * ordering were reversed.
1622 *
1623 * - In the dpif_port_remove() case, A might wake up spuriously, but
1624 * that is harmless. */
1625
97be1538 1626 ovs_mutex_lock(&dp_netdev_mutex);
ff073a71 1627 HMAP_FOR_EACH (port, node, &get_dp_netdev(dpif)->ports) {
796223f5
BP
1628 if (port->rx) {
1629 netdev_rx_wait(port->rx);
1630 }
72865317 1631 }
97be1538 1632 ovs_mutex_unlock(&dp_netdev_mutex);
72865317
BP
1633}
1634
72865317 1635static int
da546e07 1636dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet,
e995e3df
BP
1637 int queue_no, const struct flow *flow,
1638 const struct nlattr *userdata)
f5126b57 1639 OVS_EXCLUDED(dp->queue_mutex)
72865317 1640{
856081f6 1641 struct dp_netdev_queue *q = &dp->queues[queue_no];
f5126b57
BP
1642 int error;
1643
1644 ovs_mutex_lock(&dp->queue_mutex);
e995e3df
BP
1645 if (q->head - q->tail < MAX_QUEUE_LEN) {
1646 struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK];
1647 struct dpif_upcall *upcall = &u->upcall;
1648 struct ofpbuf *buf = &u->buf;
1649 size_t buf_size;
1650
1651 upcall->type = queue_no;
1652
1653 /* Allocate buffer big enough for everything. */
da546e07 1654 buf_size = ODPUTIL_FLOW_KEY_BYTES;
e995e3df
BP
1655 if (userdata) {
1656 buf_size += NLA_ALIGN(userdata->nla_len);
1657 }
1658 ofpbuf_init(buf, buf_size);
72865317 1659
e995e3df 1660 /* Put ODP flow. */
4e022ec0 1661 odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port);
e995e3df
BP
1662 upcall->key = buf->data;
1663 upcall->key_len = buf->size;
d88b629b 1664
e995e3df
BP
1665 /* Put userdata. */
1666 if (userdata) {
1667 upcall->userdata = ofpbuf_put(buf, userdata,
1668 NLA_ALIGN(userdata->nla_len));
1669 }
856081f6 1670
da546e07
JR
1671 /* Steal packet data. */
1672 ovs_assert(packet->source == OFPBUF_MALLOC);
1673 upcall->packet = *packet;
1674 ofpbuf_use(packet, NULL, 0);
856081f6 1675
d33ed218
BP
1676 seq_change(dp->queue_seq);
1677
f5126b57 1678 error = 0;
e995e3df 1679 } else {
ed27e010 1680 ovsthread_counter_inc(dp->n_lost, 1);
f5126b57 1681 error = ENOBUFS;
e995e3df 1682 }
f5126b57
BP
1683 ovs_mutex_unlock(&dp->queue_mutex);
1684
1685 return error;
72865317
BP
1686}
1687
9080a111
JR
1688struct dp_netdev_execute_aux {
1689 struct dp_netdev *dp;
1690 const struct flow *key;
1691};
1692
1693static void
758c456d
JR
1694dp_execute_cb(void *aux_, struct ofpbuf *packet,
1695 const struct pkt_metadata *md OVS_UNUSED,
09f9da0b 1696 const struct nlattr *a, bool may_steal)
8a4e3a85 1697 OVS_NO_THREAD_SAFETY_ANALYSIS
9080a111
JR
1698{
1699 struct dp_netdev_execute_aux *aux = aux_;
09f9da0b 1700 int type = nl_attr_type(a);
8a4e3a85 1701 struct dp_netdev_port *p;
9080a111 1702
09f9da0b
JR
1703 switch ((enum ovs_action_attr)type) {
1704 case OVS_ACTION_ATTR_OUTPUT:
8a4e3a85
BP
1705 p = dp_netdev_lookup_port(aux->dp, u32_to_odp(nl_attr_get_u32(a)));
1706 if (p) {
1707 netdev_send(p->netdev, packet);
1708 }
09f9da0b
JR
1709 break;
1710
1711 case OVS_ACTION_ATTR_USERSPACE: {
1712 const struct nlattr *userdata;
4fc65926 1713
09f9da0b 1714 userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
da546e07 1715
09f9da0b
JR
1716 /* Make a copy if we are not allowed to steal the packet's data. */
1717 if (!may_steal) {
1718 packet = ofpbuf_clone_with_headroom(packet, DP_NETDEV_HEADROOM);
1719 }
1720 dp_netdev_output_userspace(aux->dp, packet, DPIF_UC_ACTION, aux->key,
1721 userdata);
1722 if (!may_steal) {
1723 ofpbuf_uninit(packet);
1724 }
1725 break;
da546e07 1726 }
09f9da0b
JR
1727 case OVS_ACTION_ATTR_PUSH_VLAN:
1728 case OVS_ACTION_ATTR_POP_VLAN:
1729 case OVS_ACTION_ATTR_PUSH_MPLS:
1730 case OVS_ACTION_ATTR_POP_MPLS:
1731 case OVS_ACTION_ATTR_SET:
1732 case OVS_ACTION_ATTR_SAMPLE:
1733 case OVS_ACTION_ATTR_UNSPEC:
1734 case __OVS_ACTION_ATTR_MAX:
1735 OVS_NOT_REACHED();
da546e07 1736 }
98403001
BP
1737}
1738
4edb9ae9 1739static void
9080a111 1740dp_netdev_execute_actions(struct dp_netdev *dp, const struct flow *key,
758c456d 1741 struct ofpbuf *packet, struct pkt_metadata *md,
9080a111 1742 const struct nlattr *actions, size_t actions_len)
8a4e3a85 1743 OVS_REQ_RDLOCK(dp->port_rwlock)
72865317 1744{
9080a111 1745 struct dp_netdev_execute_aux aux = {dp, key};
9080a111 1746
758c456d 1747 odp_execute_actions(&aux, packet, md, actions, actions_len, dp_execute_cb);
72865317
BP
1748}
1749
1750const struct dpif_class dpif_netdev_class = {
72865317 1751 "netdev",
2197d7ab 1752 dpif_netdev_enumerate,
0aeaabc8 1753 dpif_netdev_port_open_type,
72865317
BP
1754 dpif_netdev_open,
1755 dpif_netdev_close,
7dab847a 1756 dpif_netdev_destroy,
640e1b20
BP
1757 dpif_netdev_run,
1758 dpif_netdev_wait,
72865317 1759 dpif_netdev_get_stats,
72865317
BP
1760 dpif_netdev_port_add,
1761 dpif_netdev_port_del,
1762 dpif_netdev_port_query_by_number,
1763 dpif_netdev_port_query_by_name,
98403001 1764 NULL, /* port_get_pid */
b0ec0f27
BP
1765 dpif_netdev_port_dump_start,
1766 dpif_netdev_port_dump_next,
1767 dpif_netdev_port_dump_done,
72865317
BP
1768 dpif_netdev_port_poll,
1769 dpif_netdev_port_poll_wait,
72865317
BP
1770 dpif_netdev_flow_get,
1771 dpif_netdev_flow_put,
1772 dpif_netdev_flow_del,
1773 dpif_netdev_flow_flush,
704a1e09
BP
1774 dpif_netdev_flow_dump_start,
1775 dpif_netdev_flow_dump_next,
1776 dpif_netdev_flow_dump_done,
72865317 1777 dpif_netdev_execute,
6bc60024 1778 NULL, /* operate */
a12b3ead 1779 dpif_netdev_recv_set,
5bf93d67 1780 dpif_netdev_queue_to_priority,
72865317
BP
1781 dpif_netdev_recv,
1782 dpif_netdev_recv_wait,
1ba530f4 1783 dpif_netdev_recv_purge,
72865317 1784};
614c4892 1785
74cc3969
BP
1786static void
1787dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED,
1788 const char *argv[], void *aux OVS_UNUSED)
1789{
1790 struct dp_netdev_port *port;
1791 struct dp_netdev *dp;
ff073a71 1792 odp_port_t port_no;
74cc3969 1793
8a4e3a85 1794 ovs_mutex_lock(&dp_netdev_mutex);
74cc3969
BP
1795 dp = shash_find_data(&dp_netdevs, argv[1]);
1796 if (!dp || !dpif_netdev_class_is_dummy(dp->class)) {
8a4e3a85 1797 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969
BP
1798 unixctl_command_reply_error(conn, "unknown datapath or not a dummy");
1799 return;
1800 }
8a4e3a85
BP
1801 ovs_refcount_ref(&dp->ref_cnt);
1802 ovs_mutex_unlock(&dp_netdev_mutex);
74cc3969 1803
8a4e3a85 1804 ovs_rwlock_wrlock(&dp->port_rwlock);
74cc3969
BP
1805 if (get_port_by_name(dp, argv[2], &port)) {
1806 unixctl_command_reply_error(conn, "unknown port");
8a4e3a85 1807 goto exit;
74cc3969
BP
1808 }
1809
ff073a71
BP
1810 port_no = u32_to_odp(atoi(argv[3]));
1811 if (!port_no || port_no == ODPP_NONE) {
74cc3969 1812 unixctl_command_reply_error(conn, "bad port number");
8a4e3a85 1813 goto exit;
74cc3969 1814 }
ff073a71 1815 if (dp_netdev_lookup_port(dp, port_no)) {
74cc3969 1816 unixctl_command_reply_error(conn, "port number already in use");
8a4e3a85 1817 goto exit;
74cc3969 1818 }
ff073a71
BP
1819 hmap_remove(&dp->ports, &port->node);
1820 port->port_no = port_no;
1821 hmap_insert(&dp->ports, &port->node, hash_int(odp_to_u32(port_no), 0));
d33ed218 1822 seq_change(dp->port_seq);
74cc3969 1823 unixctl_command_reply(conn, NULL);
8a4e3a85
BP
1824
1825exit:
1826 ovs_rwlock_unlock(&dp->port_rwlock);
1827 dp_netdev_unref(dp);
74cc3969
BP
1828}
1829
0cbfe35d
BP
1830static void
1831dpif_dummy_register__(const char *type)
1832{
1833 struct dpif_class *class;
1834
1835 class = xmalloc(sizeof *class);
1836 *class = dpif_netdev_class;
1837 class->type = xstrdup(type);
1838 dp_register_provider(class);
1839}
1840
614c4892 1841void
0cbfe35d 1842dpif_dummy_register(bool override)
614c4892 1843{
0cbfe35d
BP
1844 if (override) {
1845 struct sset types;
1846 const char *type;
1847
1848 sset_init(&types);
1849 dp_enumerate_types(&types);
1850 SSET_FOR_EACH (type, &types) {
1851 if (!dp_unregister_provider(type)) {
1852 dpif_dummy_register__(type);
1853 }
1854 }
1855 sset_destroy(&types);
614c4892 1856 }
0cbfe35d
BP
1857
1858 dpif_dummy_register__("dummy");
74cc3969
BP
1859
1860 unixctl_command_register("dpif-dummy/change-port-number",
1861 "DP PORT NEW-NUMBER",
1862 3, 3, dpif_dummy_change_port_number, NULL);
614c4892 1863}